diff options
Diffstat (limited to 'src/gallium')
69 files changed, 3099 insertions, 424 deletions
diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h new file mode 100644 index 00000000000..9e007de1ada --- /dev/null +++ b/src/gallium/auxiliary/util/u_fifo.h @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_FIFO_H +#define U_FIFO_H + +#include "util/u_memory.h" + +struct util_fifo +{ + size_t head; + size_t tail; + size_t num; + size_t size; +}; + +static INLINE struct util_fifo * +u_fifo_create(size_t size) +{ + struct util_fifo *fifo; + fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*)); + + fifo->head = 0; + fifo->tail = 0; + fifo->num = 0; + fifo->size = size; + + return fifo; +} + +static INLINE boolean +u_fifo_add(struct util_fifo *fifo, void *ptr) +{ + void **array = (void**)&fifo[1]; + if (fifo->num >= fifo->size) + return FALSE; + + if (++fifo->head >= fifo->size) + fifo->head = 0; + + array[fifo->head] = ptr; + + ++fifo->num; + + return TRUE; +} + +static INLINE boolean +u_fifo_pop(struct util_fifo *fifo, void **ptr) +{ + void **array = (void**)&fifo[1]; + + if (!fifo->num) + return FALSE; + + if (++fifo->tail >= fifo->size) + fifo->tail = 0; + + *ptr = array[fifo->tail]; + + ++fifo->num; + + return TRUE; +} + +static INLINE void +u_fifo_destroy(struct util_fifo *fifo) +{ + FREE(fifo); +} + +#endif diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 00a46d0cc48..6e82983e586 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1, PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs -PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs +PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs +PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs +PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 4c6c2bc00e1..b12c97dfb4d 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,6 +341,16 @@ util_is_inf_or_nan(float x) /** + * Test whether x is a power of two. + */ +static INLINE boolean +util_is_pot(unsigned x) +{ + return (x & (x - 1)) == 0; +} + + +/** * Find first bit set in word. Least significant bit is 1. * Return 0 if no bits set. */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 1235a67d264..0d6489c26e4 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src, pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float(0xff); + pRow[3] = 1.0F; } p += dst_stride; } @@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst, +/*** PIPE_FORMAT_R8G8B8_UNORM ***/ + +static void +r8g8b8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = ubyte_to_float(src[0]); + pRow[1] = ubyte_to_float(src[1]); + pRow[2] = ubyte_to_float(src[2]); + pRow[3] = 1.0f; + src += 3; + } + p += dst_stride; + } +} + + +static void +r8g8b8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + dst[0] = float_to_ubyte(pRow[0]); + dst[1] = float_to_ubyte(pRow[1]); + dst[2] = float_to_ubyte(pRow[2]); + dst += 3; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; @@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_R8G8B8_UNORM: + r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); break; diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 508f4560e48..b3a7774fd6a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -44,6 +44,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_fifo.h" #include "i915_context.h" #include "i915_reg.h" @@ -76,8 +77,13 @@ struct i915_vbuf_render { size_t vbo_size; size_t vbo_offset; void *vbo_ptr; - size_t vbo_alloc_size; size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; }; @@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) } static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ assert(!i915->vbo); - if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { - } else { - i915->vbo_flushed = 0; - if (i915_render->vbo) { - iws->buffer_destroy(iws, i915_render->vbo); - i915_render->vbo = NULL; - } - } + if (!i915_vbuf_render_reserve(i915_render, size)) { - if (!i915_render->vbo) { - i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + if (i915->vbo_flushed) + i915_render->pool_used = 0; + i915_vbuf_render_new_buf(i915_render, size); } i915_render->vertex_size = vertex_size; @@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); struct intel_winsys *iws = i915->iws; + int i; i915_render->i915 = i915; @@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - i915_render->vbo_alloc_size = 128 * 4096; - i915_render->vbo_size = i915_render->vbo_alloc_size; + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 /* TODO JB: is this realy needed? */ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); iws->buffer_unmap(iws, i915_render->vbo); +#endif return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h index f949f52a9ce..42c5e7470ec 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915simple/intel_winsys.h @@ -150,6 +150,17 @@ struct intel_winsys { void (*buffer_unmap)(struct intel_winsys *iws, struct intel_buffer *buffer); + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b76..06c586e6bb9 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -15,9 +15,11 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ @@ -44,7 +46,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5c29bdac56e..dea4b703c45 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if 'LLVM_VERSION' not in env: +if env.has_key('LLVM_VERSION') is False: print 'warning: LLVM not found: not building llvmpipe' Return() @@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', @@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..be7442d00ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -566,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif @@ -587,6 +623,89 @@ lp_build_abs(struct lp_build_context *bld, } +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const union lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const union lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966e..383c3c3313b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -86,6 +106,18 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f415..59d8f492e60 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include <udis86.h> #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d0..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b9217..3f88a14b5d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9f..69ed014ff3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; } +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); } + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; + + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; + } + + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + union lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038d..9d76e3064dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -41,23 +41,49 @@ union lp_type; +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); + + struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, + struct lp_build_flow_context *flow, union lp_type type, LLVMValueRef value); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d18..5ee06560932 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,20 +31,14 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include <llvm-c/Core.h> - -#include "pipe/p_format.h" +#include "pipe/p_format.h" +struct util_format_description; union lp_type; @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7d..b9b5d84bed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 00000000000..569e8d10a31 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +static LLVMValueRef +lp_build_format_swizzle(union lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + union lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 00000000000..6f565af76d1 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include <llvm-c/Core.h> + +struct pipe_texture; +struct pipe_sampler_state; +union lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 00000000000..2913b17d3f8 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,404 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + union lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + union lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + union lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + } + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9c..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -42,17 +42,30 @@ LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f815..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aecb..10c251c4162 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -44,14 +44,30 @@ struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, @@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec7..3ce379de12f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -88,8 +88,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -289,8 +288,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { emit_store( bld, inst, 0, i, texel[i] ); @@ -1283,8 +1285,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1299,8 +1300,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd973..577644b7ab8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type) } +union lp_type +lp_int_type(union lp_type type) +{ + union lp_type int_type; + int_type.value = 0; + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be641..9933e0b45c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -165,6 +165,10 @@ LLVMTypeRef lp_build_int_vec_type(union lp_type type); +union lp_type +lp_int_type(union lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b8..9465f763d50 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5c..c3e3e1af672 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -65,6 +85,8 @@ struct lp_jit_context /* TODO: blend constant color */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +102,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bbc..f9e254efcae 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -164,10 +165,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, /* TODO: blend color */ - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); + + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887d..7b26ce61a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,16 +58,20 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937e..e87976b9f36 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd7161..618cf1ffb8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -130,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, union lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -141,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp, const struct util_format_description *format_desc; union lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -286,7 +185,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); + } + lp_build_flow_scope_declare(flow, &z); + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + *pmask = mask.value; } @@ -392,6 +306,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } @@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; +#if 0 + /* C texture sampling */ + sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ @@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e3..c69d90c723a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355eed..d8455e56490 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d5..773e8482425 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1efd..9ad1bde9565 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,10 +29,13 @@ #define LP_TEX_SAMPLE_H +#include <llvm-c/Core.h> + #include "tgsi/tgsi_exec.h" struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 94eb6dad5af..9a876f404df 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 00000000000..7d31705d014 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + union lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 03b9243b828..93479a0314a 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->image_nr = 1; mt->level[0].pitch = *stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].tile_mode = bo->tile_mode; nouveau_bo_ref(bo, &mt->base.bo); return &mt->base.base; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 344c2cf6dde..d294356f75d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b266324f58d..6bf6f773b0c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) format = nv50_format(ps->format); if (format < 0) return 1; - + if (!bo->tile_flags) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, mt->level[ps->level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); OUT_RELOCh(chan, bo, ps->offset, flags); @@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, mt->level[ps->level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d7a2c8c462c..93c2152edce 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ + r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index da67bc29b89..9cc455135db 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,6 +22,9 @@ #include "r300_context.h" +#include "r300_flush.h" +#include "r300_state_invariant.h" + static boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -146,6 +149,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)r300_winsys; r300->context.screen = r300_screen(screen); + r300_init_debug(r300); + r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f78492d4aa9..6c5914baa35 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -211,10 +211,7 @@ struct r300_vertex_format { int fs_tab[16]; }; -static struct pipe_viewport_state r300_viewport_identity = { - .scale = {1.0, 1.0, 1.0, 1.0}, - .translate = {0.0, 0.0, 0.0, 0.0}, -}; +extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { /* Parent class */ @@ -275,6 +272,9 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; /* Convenience cast wrapper. */ @@ -288,4 +288,40 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +/*@}*/ + +static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +{ + return (ctx->debug & flags) ? true : false; +} + +static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +{ + if (DBG_ON(ctx, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_context * ctx); + #endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 71b142c0dbf..0a7e4703636 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -49,7 +49,8 @@ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) #define CS_LOCALS(context) \ - struct r300_winsys* cs_winsys = context->winsys; \ + struct r300_context* const cs_context_copy = (context); \ + struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ @@ -58,7 +59,7 @@ #define BEGIN_CS(size) do { \ CHECK_CS(size); \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ size, __FUNCTION__, __FILE__, __LINE__); \ } \ cs_winsys->begin_cs(cs_winsys, (size), \ @@ -78,7 +79,7 @@ #define OUT_CS_REG(register, value) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, 0)); \ @@ -89,14 +90,14 @@ * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1))); \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ @@ -107,7 +108,7 @@ #define END_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ if (cs_count != 0) \ @@ -117,7 +118,7 @@ #define FLUSH_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ cs_winsys->flush_cs(cs_winsys); \ @@ -127,7 +128,7 @@ #define OUT_CS_ONE_REG(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ @@ -141,7 +142,7 @@ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for index buffer %p," \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ OUT_CS(offset); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c new file mode 100644 index 00000000000..15308dda1de --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.c @@ -0,0 +1,88 @@ +/* + * Copyright 2009 Nicolai Haehnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +#include <ctype.h> + + +struct debug_option { + const char * name; + unsigned flag; + const char * description; +}; + +static struct debug_option debug_options[] = { + { "help", DBG_HELP, "Helpful meta-information about the driver" }, + { "fp", DBG_FP, "Fragment program handling" }, + { "vp", DBG_VP, "Vertex program handling" }, + { "cs", DBG_CS, "Command submissions" }, + { "draw", DBG_DRAW, "Draw and emit" }, + + { "all", ~0, "Convenience option that enables all debug flags" }, + + /* must be last */ + { 0, 0, 0 } +}; + +void r300_init_debug(struct r300_context * ctx) +{ + const char * options = debug_get_option("RADEON_DEBUG", 0); + boolean printhint = false; + + if (options) { + while(*options) { + if (*options == ' ' || *options == ',') { + options++; + continue; + } + + size_t length = strcspn(options, " ,"); + struct debug_option * opt; + + for(opt = debug_options; opt->name; ++opt) { + if (!strncmp(options, opt->name, length)) { + ctx->debug |= opt->flag; + break; + } + } + + if (!opt->name) { + debug_printf("Unknown debug option: %s\n", options); + printhint = true; + } + + options += length; + } + + if (!ctx->debug) + printhint = true; + } + + if (printhint || ctx->debug & DBG_HELP) { + debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" + "to a comma-separated list of debug options. Available options are:\n"); + for(struct debug_option * opt = debug_options; opt->name; ++opt) { + debug_printf(" %s: %s\n", opt->name, opt->description); + } + } +} diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index bd4d59e6f1a..1bc35c24867 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "r300_emit.h" #include "r300_fs.h" +#include "r300_state_derived.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -490,7 +491,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) { CS_LOCALS(r300); - debug_printf("r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 350691d592d..c4002b8e5d0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -56,6 +56,11 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); +void r300_emit_query_begin(struct r300_context* r300, + struct r300_query* query); +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query); + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 36463b9a2eb..a0e848a59ac 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -96,7 +96,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_FP); compiler.code = &fs->code; compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 1d5185b417e..2880d34877f 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -22,6 +22,8 @@ #include "r300_query.h" +#include "r300_emit.h" + static struct pipe_query* r300_create_query(struct pipe_context* pipe, unsigned query_type) { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cd458d019ae..d05a736dd96 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,7 @@ #include "r300_cs.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_reg.h" #include "r300_state_derived.h" @@ -34,7 +35,7 @@ struct r300_render { /* Parent class */ struct vbuf_render base; - + /* Pipe context */ struct r300_context* r300; @@ -77,7 +78,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, if (r300render->vbo && (size > r300render->vbo_alloc_size)) { pipe_buffer_reference(&r300render->vbo, NULL); } - + if (!r300render->vbo) { r300render->vbo = pipe_buffer_create(screen, 64, @@ -184,7 +185,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, prepare_render(r300render, count); - debug_printf("r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); @@ -233,7 +234,8 @@ static void r300_render_draw(struct vbuf_render* render, OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; */ - BEGIN_CS(2 + (count+1)/2); + BEGIN_CS(4 + (count+1)/2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index c16cadd0407..88cb9af6fb7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,10 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_pack_color.h" -#include "util/u_debug.h" +#include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" #include "pipe/internal/p_winsys_screen.h" @@ -429,6 +430,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->rs_state = rs; r300->dirty_state |= R300_NEW_RASTERIZER; + r300->dirty_state |= R300_NEW_RS_BLOCK; + r300->dirty_state |= R300_NEW_SCISSOR; + r300->dirty_state |= R300_NEW_VIEWPORT; } /* Free rasterizer state. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c01e61a9b19..5f6b225d340 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -23,6 +23,7 @@ #include "r300_state_derived.h" #include "r300_fs.h" +#include "r300_state_inlines.h" #include "r300_vs.h" /* r300_state_derived: Various bits of state which are dependent upon @@ -195,13 +196,13 @@ static void r300_vertex_psc(struct r300_context* r300, * and not on attrib information. */ if (r300screen->caps->has_tcl) { attrib_count = r300->vs->info.num_inputs; - debug_printf("r300: routing %d attribs in psc for vs\n", + DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", attrib_count); } else { attrib_count = vinfo->num_attribs; - debug_printf("r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," " tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, tab[i]); @@ -299,18 +300,18 @@ static void r300_update_fs_tab(struct r300_context* r300) } /* Now that we know where everything is... */ - debug_printf("r300: fp input count: %d\n", info->num_inputs); + DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); for (i = 0; i < info->num_inputs; i++) { switch (tab[i]) { case INTERP_LINEAR: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, color, tab %d\n", i, cols_emitted); tab[i] = cols_emitted; cols_emitted++; break; case INTERP_PERSPECTIVE: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, texcoord, tab %d\n", i, cols + texs); tab[i] = cols + texs; diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 63ae8eb8d08..71a4a47b003 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_DERIVED_H #define R300_STATE_DERIVED_H -#include "draw/draw_vertex.h" - -#include "r300_context.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_update_derived_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 7d822fec483..3865730d635 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -23,6 +23,12 @@ #include "r300_state_invariant.h" + +struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + /* Calculate and emit invariant state. This is data that the 3D engine * will probably want at the beginning of every CS, but it's not currently * handled by any CSO setup, and in addition it doesn't really change much. diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 2cb903bba2f..12a6e37be62 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -116,7 +116,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Setup the compiler */ rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_VP); compiler.code = &vs->code; compiler.UserData = vs; diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index c708ac31702..15c955450d0 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -4,6 +4,7 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" +#include "util/u_math.h" #include "pipe/p_inlines.h" @@ -40,9 +41,25 @@ static const struct xorg_composite_blend xorg_blends[] = { PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA }, }; + static INLINE void -pixel_to_float4(PictFormatPtr format, - CARD32 pixel, float *color) +pixel_to_float4(Pixel pixel, float *color) +{ + CARD32 r, g, b, a; + + a = (pixel >> 24) & 0xff; + r = (pixel >> 16) & 0xff; + g = (pixel >> 8) & 0xff; + b = (pixel >> 0) & 0xff; + color[0] = ((float)r) / 255.; + color[1] = ((float)g) / 255.; + color[2] = ((float)b) / 255.; + color[3] = ((float)a) / 255.; +} + +static INLINE void +render_pixel_to_float4(PictFormatPtr format, + CARD32 pixel, float *color) { CARD32 r, g, b, a; @@ -148,7 +165,7 @@ setup_vertex_data0(struct exa_context *ctx, setup_vertex0(vertices[3], dstX, dstY + height, ctx->solid_color); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -194,12 +211,39 @@ setup_vertex_data1(struct exa_context *ctx, setup_vertex1(vertices[3], dstX, dstY + height, s0, t1); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, + vertices, + sizeof(vertices)); +} + +static struct pipe_buffer * +setup_vertex_data_tex(struct exa_context *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + float vertices[4][2][4]; + + /* 1st vertex */ + setup_vertex1(vertices[0], x0, y0, + s0, t0); + /* 2nd vertex */ + setup_vertex1(vertices[1], x1, y0, + s1, t0); + /* 3rd vertex */ + setup_vertex1(vertices[2], x1, y1, + s1, t1); + /* 4th vertex */ + setup_vertex1(vertices[3], x0, y1, + s0, t1); + + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } + static INLINE void setup_vertex2(float vertex[3][4], float x, float y, float s0, float t0, float s1, float t1) @@ -253,7 +297,7 @@ setup_vertex_data2(struct exa_context *ctx, setup_vertex2(vertices[3], dstX, dstY + height, st0[0], st0[3], st1[0], st1[3]); - return pipe_user_buffer_create(ctx->ctx->screen, + return pipe_user_buffer_create(ctx->pipe->screen, vertices, sizeof(vertices)); } @@ -291,16 +335,20 @@ boolean xorg_composite_accelerated(int op, } static void -bind_framebuffer_state(struct exa_context *exa, PicturePtr pDstPicture, - struct exa_pixmap_priv *pDst) +bind_clip_state(struct exa_context *exa) +{ +} + +static void +bind_framebuffer_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { unsigned i; struct pipe_framebuffer_state state; struct pipe_surface *surface = exa_gpu_surface(exa, pDst); memset(&state, 0, sizeof(struct pipe_framebuffer_state)); - state.width = pDstPicture->pDrawable->width; - state.height = pDstPicture->pDrawable->height; + state.width = pDst->tex->width[0]; + state.height = pDst->tex->height[0]; state.nr_cbufs = 1; state.cbufs[0] = surface; @@ -338,10 +386,12 @@ set_viewport(struct exa_context *exa, int width, int height, } static void -bind_viewport_state(struct exa_context *exa, PicturePtr pDstPicture) +bind_viewport_state(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; + + debug_printf("Bind viewport (%d, %d)\n", width, height); set_viewport(exa, width, height, Y0_TOP); } @@ -350,7 +400,8 @@ static void bind_blend_state(struct exa_context *exa, int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture) { - boolean component_alpha = pSrcPicture->componentAlpha; + boolean component_alpha = (pSrcPicture) ? + pSrcPicture->componentAlpha : FALSE; struct xorg_composite_blend blend_opt; struct pipe_blend_state blend; @@ -390,14 +441,17 @@ bind_shaders(struct exa_context *exa, int op, unsigned vs_traits = 0, fs_traits = 0; struct xorg_shader shader; + exa->has_solid_color = FALSE; + if (pSrcPicture) { if (pSrcPicture->pSourcePict) { if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) { fs_traits |= FS_SOLID_FILL; vs_traits |= VS_SOLID_FILL; - pixel_to_float4(pSrcPicture->pFormat, - pSrcPicture->pSourcePict->solidFill.color, - exa->solid_color); + render_pixel_to_float4(pSrcPicture->pFormat, + pSrcPicture->pSourcePict->solidFill.color, + exa->solid_color); + exa->has_solid_color = TRUE; } else { debug_assert("!gradients not supported"); } @@ -478,15 +532,15 @@ setup_vs_constant_buffer(struct exa_context *exa, struct pipe_constant_buffer *cbuf = &exa->vs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, vs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_VERTEX, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_VERTEX, 0, cbuf); } @@ -500,22 +554,22 @@ setup_fs_constant_buffer(struct exa_context *exa) struct pipe_constant_buffer *cbuf = &exa->fs_const_buffer; pipe_buffer_reference(&cbuf->buffer, NULL); - cbuf->buffer = pipe_buffer_create(exa->ctx->screen, 16, + cbuf->buffer = pipe_buffer_create(exa->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (cbuf->buffer) { - pipe_buffer_write(exa->ctx->screen, cbuf->buffer, + pipe_buffer_write(exa->pipe->screen, cbuf->buffer, 0, param_bytes, fs_consts); } - exa->ctx->set_constant_buffer(exa->ctx, PIPE_SHADER_FRAGMENT, 0, cbuf); + exa->pipe->set_constant_buffer(exa->pipe, PIPE_SHADER_FRAGMENT, 0, cbuf); } static void -setup_constant_buffers(struct exa_context *exa, PicturePtr pDstPicture) +setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDstPicture->pDrawable->width; - int height = pDstPicture->pDrawable->height; + int width = pDst->tex->width[0]; + int height = pDst->tex->height[0]; setup_vs_constant_buffer(exa, width, height); setup_fs_constant_buffer(exa); @@ -530,15 +584,15 @@ boolean xorg_composite_bind_state(struct exa_context *exa, struct exa_pixmap_priv *pMask, struct exa_pixmap_priv *pDst) { - bind_framebuffer_state(exa, pDstPicture, pDst); - bind_viewport_state(exa, pDstPicture); + bind_framebuffer_state(exa, pDst); + bind_viewport_state(exa, pDst); bind_blend_state(exa, op, pSrcPicture, pMaskPicture); bind_rasterizer_state(exa); bind_shaders(exa, op, pSrcPicture, pMaskPicture); bind_samplers(exa, op, pSrcPicture, pMaskPicture, pDstPicture, pSrc, pMask, pDst); - - setup_constant_buffers(exa, pDstPicture); + bind_clip_state(exa); + setup_constant_buffers(exa, pDst); return FALSE; } @@ -548,7 +602,7 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height) { - struct pipe_context *pipe = exa->ctx; + struct pipe_context *pipe = exa->pipe; struct pipe_buffer *buf = 0; if (exa->num_bound_samplers == 0 ) { /* solid fill */ @@ -573,12 +627,393 @@ void xorg_composite(struct exa_context *exa, } if (buf) { + int num_attribs = 1; /*pos*/ + num_attribs += exa->num_bound_samplers; + if (exa->has_solid_color) + ++num_attribs; + util_draw_vertex_buffer(pipe, buf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ - 1 + exa->num_bound_samplers); /* attribs/vert */ + num_attribs); /* attribs/vert */ pipe_buffer_reference(&buf, NULL); } } +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg) +{ + unsigned vs_traits, fs_traits; + struct xorg_shader shader; + + pixel_to_float4(fg, exa->solid_color); + exa->has_solid_color = TRUE; + + exa->solid_color[3] = 1.f; + + debug_printf("Color Pixel=(%d, %d, %d, %d), RGBA=(%f, %f, %f, %f)\n", + (fg >> 24) & 0xff, (fg >> 16) & 0xff, + (fg >> 8) & 0xff, (fg >> 0) & 0xff, + exa->solid_color[0], exa->solid_color[1], + exa->solid_color[2], exa->solid_color[3]); + +#if 0 + exa->solid_color[0] = 1.f; + exa->solid_color[1] = 0.f; + exa->solid_color[2] = 0.f; + exa->solid_color[3] = 1.f; +#endif + + vs_traits = VS_SOLID_FILL; + fs_traits = FS_SOLID_FILL; + + bind_framebuffer_state(exa, pixmap); + bind_viewport_state(exa, pixmap); + bind_rasterizer_state(exa); + bind_blend_state(exa, PictOpSrc, NULL, NULL); + setup_constant_buffers(exa, pixmap); + bind_clip_state(exa); + + shader = xorg_shaders_get(exa->shaders, vs_traits, fs_traits); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + return FALSE; +} + +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_buffer *buf = 0; + float vertices[4][2][4]; + + x0 = 10; y0 = 10; + x1 = 300; y1 = 300; + + /* 1st vertex */ + setup_vertex0(vertices[0], x0, y0, + exa->solid_color); + /* 2nd vertex */ + setup_vertex0(vertices[1], x1, y0, + exa->solid_color); + /* 3rd vertex */ + setup_vertex0(vertices[2], x1, y1, + exa->solid_color); + /* 4th vertex */ + setup_vertex0(vertices[3], x0, y1, + exa->solid_color); + + buf = pipe_user_buffer_create(exa->pipe->screen, + vertices, + sizeof(vertices)); + + + if (buf) { + debug_printf("Drawing buf is %p\n", buf); + util_draw_vertex_buffer(pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } +} + + +static INLINE void shift_rectx(float coords[4], + const float *bounds, + const float shift) +{ + coords[0] += shift; + coords[2] -= shift; + if (bounds) { + coords[2] = MIN2(coords[2], bounds[2]); + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + } +} + +static INLINE void shift_recty(float coords[4], + const float *bounds, + const float shift) +{ + coords[1] += shift; + coords[3] -= shift; + if (bounds) { + coords[3] = MIN2(coords[3], bounds[3]); + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + } +} + +static INLINE void bound_rect(float coords[4], + const float bounds[4], + float shift[4]) +{ + /* if outside the bounds */ + if (coords[0] > (bounds[0] + bounds[2]) || + coords[1] > (bounds[1] + bounds[3]) || + (coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + shift[0] = 0.f; + shift[1] = 0.f; + return; + } + + /* bound x */ + if (coords[0] < bounds[0]) { + shift[0] = bounds[0] - coords[0]; + coords[2] -= shift[0]; + coords[0] = bounds[0]; + } else + shift[0] = 0.f; + + /* bound y */ + if (coords[1] < bounds[1]) { + shift[1] = bounds[1] - coords[1]; + coords[3] -= shift[1]; + coords[1] = bounds[1]; + } else + shift[1] = 0.f; + + shift[2] = bounds[2] - coords[2]; + shift[3] = bounds[3] - coords[3]; + /* bound width/height */ + coords[2] = MIN2(coords[2], bounds[2]); + coords[3] = MIN2(coords[3], bounds[3]); + + /* bound x/y + width/height */ + if ((coords[0] + coords[2]) > (bounds[0] + bounds[2])) { + coords[2] = (bounds[0] + bounds[2]) - coords[0]; + } + if ((coords[1] + coords[3]) > (bounds[1] + bounds[3])) { + coords[3] = (bounds[1] + bounds[3]) - coords[1]; + } + + /* if outside the bounds */ + if ((coords[0] + coords[2]) < bounds[0] || + (coords[1] + coords[3]) < bounds[1]) { + coords[0] = 0.f; + coords[1] = 0.f; + coords[2] = 0.f; + coords[3] = 0.f; + return; + } +} + +static INLINE void sync_size(float *src_loc, float *dst_loc) +{ + src_loc[2] = MIN2(src_loc[2], dst_loc[2]); + src_loc[3] = MIN2(src_loc[3], dst_loc[3]); + dst_loc[2] = src_loc[2]; + dst_loc[3] = src_loc[3]; +} + + +static void renderer_copy_texture(struct exa_context *exa, + struct pipe_texture *src, + float sx1, float sy1, + float sx2, float sy2, + struct pipe_texture *dst, + float dx1, float dy1, + float dx2, float dy2) +{ + struct pipe_context *pipe = exa->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_buffer *buf; + struct pipe_surface *dst_surf = screen->get_tex_surface( + screen, dst, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + struct pipe_framebuffer_state fb; + float s0, t0, s1, t1; + struct xorg_shader shader; + + assert(src->width[0] != 0); + assert(src->height[0] != 0); + assert(dst->width[0] != 0); + assert(dst->height[0] != 0); + +#if 0 + debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", + sx1, sy1, sx2, sy2, dx1, dy1, dx2, dy2); +#endif + +#if 1 + s0 = sx1 / src->width[0]; + s1 = sx2 / src->width[0]; + t0 = sy1 / src->height[0]; + t1 = sy2 / src->height[0]; +#else + s0 = 0; + s1 = 1; + t0 = 0; + t1 = 1; +#endif + + assert(screen->is_format_supported(screen, dst_surf->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + + /* save state (restored below) */ + cso_save_blend(exa->cso); + cso_save_samplers(exa->cso); + cso_save_sampler_textures(exa->cso); + cso_save_framebuffer(exa->cso); + cso_save_fragment_shader(exa->cso); + cso_save_vertex_shader(exa->cso); + + cso_save_viewport(exa->cso); + + + /* set misc state we care about */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + cso_set_blend(exa->cso, &blend); + } + + /* sampler */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + cso_single_sampler(exa->cso, 0, &sampler); + cso_single_sampler_done(exa->cso); + } + + set_viewport(exa, dst_surf->width, dst_surf->height, Y0_TOP); + + /* texture */ + cso_set_sampler_textures(exa->cso, 1, &src); + + /* shaders */ + shader = xorg_shaders_get(exa->shaders, + VS_COMPOSITE, + FS_COMPOSITE); + cso_set_vertex_shader_handle(exa->cso, shader.vs); + cso_set_fragment_shader_handle(exa->cso, shader.fs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst_surf->width; + fb.height = dst_surf->height; + fb.nr_cbufs = 1; + fb.cbufs[0] = dst_surf; + { + int i; + for (i = 1; i < PIPE_MAX_COLOR_BUFS; ++i) + fb.cbufs[i] = 0; + } + cso_set_framebuffer(exa->cso, &fb); + + /* draw quad */ + buf = setup_vertex_data_tex(exa, + dx1, dy1, + dx2, dy2, + s0, t0, s1, t1, + 0.0f); + + if (buf) { + util_draw_vertex_buffer(exa->pipe, buf, 0, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe_buffer_reference(&buf, NULL); + } + + /* restore state we changed */ + cso_restore_blend(exa->cso); + cso_restore_samplers(exa->cso); + cso_restore_sampler_textures(exa->cso); + cso_restore_framebuffer(exa->cso); + cso_restore_vertex_shader(exa->cso); + cso_restore_fragment_shader(exa->cso); + cso_restore_viewport(exa->cso); + + pipe_surface_reference(&dst_surf, NULL); +} + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst_priv, int dx, int dy, + struct exa_pixmap_priv *src_priv, int sx, int sy, + int width, int height) +{ + float dst_loc[4], src_loc[4]; + float dst_bounds[4], src_bounds[4]; + float src_shift[4], dst_shift[4], shift[4]; + struct pipe_texture *dst = dst_priv->tex; + struct pipe_texture *src = src_priv->tex; + + dst_loc[0] = dx; + dst_loc[1] = dy; + dst_loc[2] = width; + dst_loc[3] = height; + dst_bounds[0] = 0.f; + dst_bounds[1] = 0.f; + dst_bounds[2] = dst->width[0]; + dst_bounds[3] = dst->height[0]; + + src_loc[0] = sx; + src_loc[1] = sy; + src_loc[2] = width; + src_loc[3] = height; + src_bounds[0] = 0.f; + src_bounds[1] = 0.f; + src_bounds[2] = src->width[0]; + src_bounds[3] = src->height[0]; + + bound_rect(src_loc, src_bounds, src_shift); + bound_rect(dst_loc, dst_bounds, dst_shift); + shift[0] = src_shift[0] - dst_shift[0]; + shift[1] = src_shift[1] - dst_shift[1]; + + if (shift[0] < 0) + shift_rectx(src_loc, src_bounds, -shift[0]); + else + shift_rectx(dst_loc, dst_bounds, shift[0]); + + if (shift[1] < 0) + shift_recty(src_loc, src_bounds, -shift[1]); + else + shift_recty(dst_loc, dst_bounds, shift[1]); + + sync_size(src_loc, dst_loc); + + if (src_loc[2] >= 0 && src_loc[3] >= 0 && + dst_loc[2] >= 0 && dst_loc[3] >= 0) { + renderer_copy_texture(ctx, + src, + src_loc[0], + src_loc[1] + src_loc[3], + src_loc[0] + src_loc[2], + src_loc[1], + dst, + dst_loc[0], + dst_loc[1] + dst_loc[3], + dst_loc[0] + dst_loc[2], + dst_loc[1]); + } +} + diff --git a/src/gallium/state_trackers/xorg/xorg_composite.h b/src/gallium/state_trackers/xorg/xorg_composite.h index 17dfcb199ea..e73f1c704a8 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.h +++ b/src/gallium/state_trackers/xorg/xorg_composite.h @@ -22,4 +22,16 @@ void xorg_composite(struct exa_context *exa, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int width, int height); +boolean xorg_solid_bind_state(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + Pixel fg); +void xorg_solid(struct exa_context *exa, + struct exa_pixmap_priv *pixmap, + int x0, int y0, int x1, int y1); + +void xorg_copy_pixmap(struct exa_context *ctx, + struct exa_pixmap_priv *dst, int dx, int dy, + struct exa_pixmap_priv *src, int sx, int sy, + int width, int height); + #endif diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 6431a0fe254..8a362596c75 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -118,6 +118,7 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) } if (!tex) { + exaMoveInPixmap(private->pPixmap); xorg_exa_set_shared_usage(private->pPixmap); pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL); tex = xorg_exa_get_texture(private->pPixmap); diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index a17a71f23a8..312dab1544c 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -82,6 +82,25 @@ exa_get_pipe_format(int depth, enum pipe_format *format, int *bbp) } } +static void +xorg_exa_init_state(struct exa_context *exa) +{ + struct pipe_depth_stencil_alpha_state dsa; + + /* set common initial clip state */ + memset(&dsa, 0, sizeof(struct pipe_depth_stencil_alpha_state)); + cso_set_depth_stencil_alpha(exa->cso, &dsa); +} + +static void +xorg_exa_common_done(struct exa_context *exa) +{ + exa->copy.src = NULL; + exa->copy.dst = NULL; + exa->has_solid_color = FALSE; + exa->num_bound_samplers = 0; +} + /* * Static exported EXA functions */ @@ -111,9 +130,9 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, if (!priv || !priv->tex) return FALSE; - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, PIPE_TRANSFER_READ, x, y, w, h); @@ -178,9 +197,9 @@ ExaPrepareAccess(PixmapPtr pPix, int index) if (priv->map_count++ == 0) { - if (exa->ctx->is_texture_referenced(exa->ctx, priv->tex, 0, 0) & + if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & PIPE_REFERENCED_FOR_WRITE) - exa->ctx->flush(exa->ctx, 0, NULL); + exa->pipe->flush(exa->pipe, 0, NULL); priv->map_transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, @@ -231,15 +250,22 @@ ExaDone(PixmapPtr pPixmap) if (!priv) return; - if (priv->src_surf) - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; +#if 1 + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL); +#else + xorg_finish(exa); +#endif + xorg_exa_common_done(exa); } static void ExaDoneComposite(PixmapPtr pPixmap) { + ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + xorg_exa_common_done(exa); } static Bool @@ -250,6 +276,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareSolid - test\n"); if (pPixmap->drawable.depth < 15) return FALSE; @@ -262,12 +289,11 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg) if (alu != GXcopy) return FALSE; - if (!exa->ctx || !exa->ctx->surface_fill) + if (!exa->pipe) return FALSE; - priv->color = fg; - - return TRUE; + debug_printf(" ExaPrepareSolid(0x%x)\n", fg); + return xorg_solid_bind_state(exa, priv, fg); } static void @@ -277,12 +303,18 @@ ExaSolid(PixmapPtr pPixmap, int x0, int y0, int x1, int y1) modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_fill(exa->ctx, surf, x0, y0, x1 - x0, y1 - y0, - priv->color); + debug_printf("\tExaSolid(%d, %d, %d, %d)\n", x0, y0, x1, y1); - exa->scrn->tex_surface_destroy(surf); +#if 0 + if (x0 == 0 && y0 == 0 && + x1 == priv->tex->width[0] && + y1 == priv->tex->height[0]) { + exa->ctx->clear(exa->ctx, PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, + exa->solid_color, 1., 0); + } else +#endif + xorg_solid(exa, priv, x0, y0, x1, y1) ; } static Bool @@ -295,6 +327,8 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); struct exa_pixmap_priv *src_priv = exaGetPixmapDriverPrivate(pSrcPixmap); + debug_printf("ExaPrepareCopy\n"); + if (alu != GXcopy) return FALSE; @@ -310,27 +344,33 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, if (!priv->tex || !src_priv->tex) return FALSE; - if (!exa->ctx || !exa->ctx->surface_copy) + if (!exa->pipe) return FALSE; - priv->src_surf = exa_gpu_surface(exa, src_priv); + exa->copy.src = src_priv; + exa->copy.dst = priv; - return TRUE; + /*XXX disabled until some issues with syncing are fixed */ + return FALSE; } static void ExaCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height) { - ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_context *exa = ms->exa; - struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); - struct pipe_surface *surf = exa_gpu_surface(exa, priv); + ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; + modesettingPtr ms = modesettingPTR(pScrn); + struct exa_context *exa = ms->exa; + struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDstPixmap); + + debug_printf("\tExaCopy(srcx=%d, srcy=%d, dstX=%d, dstY=%d, w=%d, h=%d)\n", + srcX, srcY, dstX, dstY, width, height); - exa->ctx->surface_copy(exa->ctx, surf, dstX, dstY, priv->src_surf, - srcX, srcY, width, height); - exa->scrn->tex_surface_destroy(surf); + debug_assert(priv == exa->copy.dst); + + xorg_copy_pixmap(exa, exa->copy.dst, dstX, dstY, + exa->copy.src, srcX, srcY, + width, height); } static Bool @@ -342,6 +382,8 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture, modesettingPtr ms = modesettingPTR(pScrn); struct exa_context *exa = ms->exa; + debug_printf("ExaPrepareComposite\n"); + return xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture, pDstPicture, exaGetPixmapDriverPrivate(pSrc), @@ -358,6 +400,8 @@ ExaComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, struct exa_context *exa = ms->exa; struct exa_pixmap_priv *priv = exaGetPixmapDriverPrivate(pDst); + debug_printf("\tExaComposite\n"); + xorg_composite(exa, priv, srcX, srcY, maskX, maskY, dstX, dstY, width, height); } @@ -537,16 +581,16 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, if (priv->tex) { struct pipe_surface *dst_surf; + struct pipe_surface *src_surf; dst_surf = exa->scrn->get_tex_surface(exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); - priv->src_surf = exa_gpu_surface(exa, priv); - exa->ctx->surface_copy(exa->ctx, dst_surf, 0, 0, priv->src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + src_surf = exa_gpu_surface(exa, priv); + exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); exa->scrn->tex_surface_destroy(dst_surf); - exa->scrn->tex_surface_destroy(priv->src_surf); - priv->src_surf = NULL; + exa->scrn->tex_surface_destroy(src_surf); } else if (pPixmap->devPrivate.ptr) { struct pipe_transfer *transfer; @@ -563,6 +607,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, pPixmap->devKind, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); exa->scrn->tex_transfer_destroy(transfer); + + xfree(pPixmap->devPrivate.ptr); + pPixmap->devPrivate.ptr = NULL; } } #ifdef DRM_MODE_FEATURE_DIRTYFB @@ -611,8 +658,8 @@ xorg_exa_close(ScrnInfoPtr pScrn) cso_destroy_context(exa->cso); } - if (exa->ctx) - exa->ctx->destroy(exa->ctx); + if (exa->pipe) + exa->pipe->destroy(exa->pipe); exaDriverFini(pScrn->pScreen); xfree(exa); @@ -645,6 +692,12 @@ xorg_exa_init(ScrnInfoPtr pScrn) pExa->pixmapOffsetAlign = 0; pExa->pixmapPitchAlign = 1; pExa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_HANDLES_PIXMAPS; +#ifdef EXA_SUPPORTS_PREPARE_AUX + pExa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif +#ifdef EXA_MIXED_PIXMAPS + pExa->flags |= EXA_MIXED_PIXMAPS; +#endif pExa->maxX = 8191; /* FIXME */ pExa->maxY = 8191; /* FIXME */ @@ -674,13 +727,15 @@ xorg_exa_init(ScrnInfoPtr pScrn) } exa->scrn = ms->screen; - exa->ctx = ms->api->create_context(ms->api, exa->scrn); + exa->pipe = ms->api->create_context(ms->api, exa->scrn); /* Share context with DRI */ - ms->ctx = exa->ctx; + ms->ctx = exa->pipe; - exa->cso = cso_create_context(exa->ctx); + exa->cso = cso_create_context(exa->pipe); exa->shaders = xorg_shaders_create(exa); + xorg_exa_init_state(exa); + return (void *)exa; out_err: @@ -698,3 +753,19 @@ exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv) } +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence) +{ + exa->pipe->flush(exa->pipe, pipeFlushFlags, fence); +} + +void xorg_exa_finish(struct exa_context *exa) +{ + struct pipe_fence_handle *fence = NULL; + + xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, &fence); + + exa->pipe->screen->fence_finish(exa->pipe->screen, fence, 0); + exa->pipe->screen->fence_reference(exa->pipe->screen, &fence, NULL); +} + diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h index 5b515be1397..43949b04a44 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.h +++ b/src/gallium/state_trackers/xorg/xorg_exa.h @@ -14,7 +14,7 @@ struct xorg_shaders; struct exa_context { ExaDriverPtr pExa; - struct pipe_context *ctx; + struct pipe_context *pipe; struct pipe_screen *scrn; struct cso_context *cso; struct xorg_shaders *shaders; @@ -26,6 +26,12 @@ struct exa_context int num_bound_samplers; float solid_color[4]; + boolean has_solid_color; + + struct { + struct exa_pixmap_priv *src; + struct exa_pixmap_priv *dst; + } copy; }; @@ -36,8 +42,6 @@ struct exa_pixmap_priv struct pipe_texture *tex; struct pipe_texture *depth_stencil_tex; - unsigned int color; - struct pipe_surface *src_surf; /* for copies */ struct pipe_transfer *map_transfer; unsigned map_count; @@ -46,5 +50,8 @@ struct exa_pixmap_priv struct pipe_surface * exa_gpu_surface(struct exa_context *exa, struct exa_pixmap_priv *priv); +void xorg_exa_flush(struct exa_context *exa, uint pipeFlushFlags, + struct pipe_fence_handle **fence); +void xorg_exa_finish(struct exa_context *exa); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c index cfee10c3b30..694eded09a2 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c +++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c @@ -268,8 +268,8 @@ create_vs(struct pipe_context *pipe, } if (is_fill) { src = ureg_DECL_vs_input(ureg, - TGSI_SEMANTIC_COLOR, 1); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1); + TGSI_SEMANTIC_COLOR, 0); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(ureg, dst, src); } @@ -318,8 +318,8 @@ create_fs(struct pipe_context *pipe, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_PERSPECTIVE); - } - if (is_fill) { + } else { + debug_assert(is_fill); if (is_solid) src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, @@ -473,9 +473,9 @@ struct xorg_shader xorg_shaders_get(struct xorg_shaders *sc, struct xorg_shader shader = {0}; void *vs, *fs; - vs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_VERTEX, + vs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_VERTEX, sc->vs_hash, vs_traits); - fs = shader_from_cache(sc->exa->ctx, PIPE_SHADER_FRAGMENT, + fs = shader_from_cache(sc->exa->pipe, PIPE_SHADER_FRAGMENT, sc->fs_hash, fs_traits); debug_assert(vs && fs); diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index 6c00861f517..f9738110723 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -12,8 +12,9 @@ drivers = [ trace, ] -env.SharedLibrary( +env.LoadableModule( target ='i915_dri.so', source = COMMON_GALLIUM_SOURCES, LIBS = drivers + mesa + auxiliaries + env['LIBS'], + SHLIBPREFIX = '', ) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c index 5ca3ad9762d..ebd1b607b78 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_batchbuffer.c @@ -13,6 +13,7 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #undef INTEL_RUN_SYNC +#undef INTEL_MAP_BATCHBUFFER struct intel_drm_batchbuffer { @@ -40,8 +41,11 @@ intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch) "gallium3d_batchbuffer", batch->actual_size, 4096); + +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_map(batch->bo, TRUE); batch->base.map = batch->bo->virtual; +#endif memset(batch->base.map, 0, batch->actual_size); batch->base.ptr = batch->base.map; @@ -55,7 +59,13 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) struct intel_drm_winsys *idws = intel_drm_winsys(iws); struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer); + batch->actual_size = idws->max_batch_size; + +#ifdef INTEL_MAP_BATCHBUFFER batch->base.map = NULL; +#else + batch->base.map = MALLOC(batch->actual_size); +#endif batch->base.ptr = NULL; batch->base.size = 0; @@ -64,8 +74,6 @@ intel_drm_batchbuffer_create(struct intel_winsys *iws) batch->base.iws = iws; - batch->actual_size = idws->max_batch_size; - intel_drm_batchbuffer_reset(batch); return &batch->base; @@ -156,7 +164,11 @@ intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, used = batch->base.ptr - batch->base.map; +#ifdef INTEL_MAP_BATCHBUFFER drm_intel_bo_unmap(batch->bo); +#else + drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); +#endif /* Do the sending to HW */ ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); @@ -202,7 +214,10 @@ intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); - free(batch); +#ifndef INTEL_MAP_BATCHBUFFER + FREE(batch->base.map); +#endif + FREE(batch); } void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws) diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c index e017cd2e982..0030f915a36 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c @@ -28,6 +28,7 @@ intel_drm_buffer_create(struct intel_winsys *iws, } else if (type == INTEL_NEW_VERTEX) { name = "gallium3d_vertex"; pool = idws->pools.gem; + buf->map_gtt = TRUE; } else if (type == INTEL_NEW_SCANOUT) { name = "gallium3d_scanout"; pool = idws->pools.gem; @@ -109,6 +110,18 @@ intel_drm_buffer_unmap(struct intel_winsys *iws, drm_intel_bo_unmap(intel_bo(buffer)); } +static int +intel_drm_buffer_write(struct intel_winsys *iws, + struct intel_buffer *buffer, + const void *data, + size_t size, + size_t offset) +{ + struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + + return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); +} + static void intel_drm_buffer_destroy(struct intel_winsys *iws, struct intel_buffer *buffer) @@ -130,5 +143,6 @@ intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws) idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg; idws->base.buffer_map = intel_drm_buffer_map; idws->base.buffer_unmap = intel_drm_buffer_unmap; + idws->base.buffer_write = intel_drm_buffer_write; idws->base.buffer_destroy = intel_drm_buffer_destroy; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 07551e7cd16..7bf23cba236 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -32,6 +32,8 @@ #include "radeon_buffer.h" +#include "radeon_bo_gem.h" + static const char *radeon_get_name(struct pipe_winsys *ws) { return "Radeon/GEM+KMS"; @@ -99,6 +101,7 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { struct pipe_format_block block; @@ -134,8 +137,11 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; - if (!(flags & PIPE_BUFFER_USAGE_DONTBLOCK)) { - radeon_bo_wait(radeon_buffer->bo); + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + uint32_t domain; + + if (radeon_bo_is_busy(radeon_buffer->bo, &domain)) + return NULL; } if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { write = 1; @@ -187,7 +193,6 @@ static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys, struct radeon_winsys* radeon_pipe_winsys(int fd) { struct radeon_winsys* radeon_ws; - struct radeon_bo_manager* bom; radeon_ws = CALLOC_STRUCT(radeon_winsys); if (radeon_ws == NULL) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 47376a0f07b..a4011db0b87 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -98,7 +98,7 @@ struct pipe_buffer* radeon_buffer_from_handle(struct drm_api* api, return &radeon_buffer->base; } -struct pipe_texture* +static struct pipe_texture* radeon_texture_from_shared_handle(struct drm_api *api, struct pipe_screen *screen, struct pipe_texture *templ, @@ -116,20 +116,22 @@ radeon_texture_from_shared_handle(struct drm_api *api, return screen->texture_blanket(screen, templ, &stride, buffer); } -boolean radeon_shared_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_shared_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { int retval, fd; struct drm_gem_flink flink; struct radeon_pipe_buffer* radeon_buffer; - struct pipe_buffer* buffer = &radeon_buffer->base; - if (!radeon_buffer_from_texture(api, texture, buffer, stride)) { + struct pipe_buffer *buffer; + + if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { return FALSE; } + radeon_buffer = (struct radeon_pipe_buffer*)buffer; if (!radeon_buffer->flinked) { fd = ((struct radeon_winsys*)screen->winsys)->priv->fd; @@ -150,11 +152,11 @@ boolean radeon_shared_handle_from_texture(struct drm_api *api, return TRUE; } -boolean radeon_local_handle_from_texture(struct drm_api *api, - struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned *stride, - unsigned *handle) +static boolean radeon_local_handle_from_texture(struct drm_api *api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) { struct pipe_buffer *buffer; if (!radeon_buffer_from_texture(api, texture, &buffer, stride)) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d7238762219..d2d84f1a8f0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -137,7 +137,7 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) int target = 0; int retval; - info.value = ⌖ + info.value = (unsigned long)⌖ /* First, get the number of pixel pipes */ info.request = RADEON_INFO_NUM_GB_PIPES; diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 33826524d7a..66120a6a983 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -166,6 +166,7 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { const unsigned alignment = 64; |