diff options
author | Nicolai Hähnle <[email protected]> | 2009-09-21 13:26:50 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2009-09-21 13:26:50 +0200 |
commit | 81c7561d9d3faf70ac22c6a5f3fbea18f53eed92 (patch) | |
tree | 5ec097e0e1fe99804104d3a56923c16edf1533aa /src/gallium/drivers | |
parent | f02f63997ce65530788a6dfcb28f11790a14d938 (diff) | |
parent | 3083ba38f4c884b32cd0460607b5064b6b7008d2 (diff) |
Merge branch 'master' into r300-compiler
There were additional non-textual conflicts.
Conflicts:
src/gallium/drivers/r300/r300_tgsi_to_rc.c
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
src/mesa/drivers/dri/r300/compiler/radeon_program.c
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
Diffstat (limited to 'src/gallium/drivers')
105 files changed, 4116 insertions, 1437 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index bd48ce70050..9161747fdbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 508f4560e48..b3a7774fd6a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -44,6 +44,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_fifo.h" #include "i915_context.h" #include "i915_reg.h" @@ -76,8 +77,13 @@ struct i915_vbuf_render { size_t vbo_size; size_t vbo_offset; void *vbo_ptr; - size_t vbo_alloc_size; size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; }; @@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) } static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ assert(!i915->vbo); - if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { - } else { - i915->vbo_flushed = 0; - if (i915_render->vbo) { - iws->buffer_destroy(iws, i915_render->vbo); - i915_render->vbo = NULL; - } - } + if (!i915_vbuf_render_reserve(i915_render, size)) { - if (!i915_render->vbo) { - i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + if (i915->vbo_flushed) + i915_render->pool_used = 0; + i915_vbuf_render_new_buf(i915_render, size); } i915_render->vertex_size = vertex_size; @@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); struct intel_winsys *iws = i915->iws; + int i; i915_render->i915 = i915; @@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - i915_render->vbo_alloc_size = 128 * 4096; - i915_render->vbo_size = i915_render->vbo_alloc_size; + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; i915_render->vbo_offset = 0; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 /* TODO JB: is this realy needed? */ i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); iws->buffer_unmap(iws, i915_render->vbo); +#endif return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9f017a14cca..a1dd43c1bcc 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h index f949f52a9ce..42c5e7470ec 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915simple/intel_winsys.h @@ -150,6 +150,17 @@ struct intel_winsys { void (*buffer_unmap)(struct intel_winsys *iws, struct intel_buffer *buffer); + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b22e105f106..fb68fd624b3 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 6e63a0c2b76..cd7b6356d28 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -3,6 +3,8 @@ include $(TOP)/configs/current LIBNAME = llvmpipe +CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS + C_SOURCES = \ lp_bld_alpha.c \ lp_bld_arit.c \ @@ -15,9 +17,11 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ lp_bld_tgsi_soa.c \ @@ -44,7 +48,8 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample.c \ + lp_tex_sample_c.c \ + lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 498d21dea6c..89d08834a3c 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -8,13 +8,16 @@ Done so far is: - the whole fragment pipeline is code generated in a single function + - input interpolation + - depth testing + - texture sampling (not all state/formats are supported) + - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception we don't fallback to TGSI interpretation when an unsupported opcode is found, but just ignore it - - texture sampling via an intrinsic call - done in SoA layout - input interpolation also code generated @@ -28,16 +31,17 @@ Done so far is: any width and length - not all operations are implemented for these types yet though -Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch, -which includes hand written fast implementations for common cases. +Most mesa/progs/demos/* work. To do (probably by this order): - code generate stipple and stencil testing - - code generate texture sampling + - translate the remaining bits of texture sampling state - translate TGSI control flow instructions, and all other remaining opcodes + + - integrate with the draw module for VS code generation - code generate the triangle setup and rasterization @@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as make linux-llvm -but the rest of these instructions assume scons is used. +but the rest of these instructions assume that scons is used. Using @@ -108,6 +112,9 @@ or export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH +For performance evaluation pass debug=no to scons, and use the corresponding +lib directory without the "-debug" suffix. + Unit testing ============ @@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe: - lp_test_conv: SIMD vector conversion - lp_test_format: pixel unpacking/packing -Some of this tests can output results and benchmarks to a tab-seperated-file +Some of this tests can output results and benchmarks to a tab-separated-file for posterior analysis, e.g.: build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv @@ -133,10 +140,10 @@ Development Notes at the top of the lp_bld_*.c functions. - All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a standalone Gallium state -> LLVM IR translation module. + put in a stand-alone Gallium state -> LLVM IR translation module. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - for a standalone example. + for a stand-alone example. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 5c29bdac56e..f4a9a3b22e2 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() env.Tool('llvm') -if 'LLVM_VERSION' not in env: +if not env.has_key('LLVM_VERSION'): print 'warning: LLVM not found: not building llvmpipe' Return() @@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', 'lp_bld_swizzle.c', @@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample.c', + 'lp_tex_sample_c.c', + 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', 'lp_tile_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index 49c2f911af7..2b4bc5c819d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -45,7 +45,7 @@ void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 9dbcdb4daab..634575670db 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -38,14 +38,14 @@ #include <llvm-c/Core.h> struct pipe_alpha_state; -union lp_type; +struct lp_type; struct lp_build_mask_context; void lp_build_alpha_test(LLVMBuilderRef builder, const struct pipe_alpha_state *state, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, LLVMValueRef ref); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..0b115fc9b07 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -159,7 +159,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; @@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) @@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) @@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -565,21 +590,32 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.width*type.length == 128) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } #endif @@ -588,10 +624,188 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); +#endif + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -609,7 +823,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; @@ -640,7 +854,7 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); @@ -661,7 +875,7 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -681,7 +895,7 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -752,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld, const double *coeffs, unsigned num_coeffs) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; @@ -800,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; @@ -893,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index fc8cb25966e..d68a97c4b87 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1); + +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11); + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index d19e18846c2..da272e549f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -46,7 +46,7 @@ struct pipe_blend_state; -union lp_type; +struct lp_type; struct lp_build_context; @@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, @@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef const_[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c11a9398f87..d14f468ba93 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef const_, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index b92254a7d6f..9511299d558 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, void lp_build_blend_soa(LLVMBuilderRef builder, const struct pipe_blend_state *blend, - union lp_type type, + struct lp_type type, LLVMValueRef src[4], LLVMValueRef dst[4], LLVMValueRef con[4], diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c index 21487365eae..c8eaa8c3940 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c @@ -42,7 +42,7 @@ unsigned -lp_mantissa(union lp_type type) +lp_mantissa(struct lp_type type) { assert(type.floating); @@ -72,7 +72,7 @@ lp_mantissa(union lp_type type) * Same as lp_const_scale(), but in terms of shifts. */ unsigned -lp_const_shift(union lp_type type) +lp_const_shift(struct lp_type type) { if(type.floating) return 0; @@ -86,7 +86,7 @@ lp_const_shift(union lp_type type) unsigned -lp_const_offset(union lp_type type) +lp_const_offset(struct lp_type type) { if(type.floating || type.fixed) return 0; @@ -104,7 +104,7 @@ lp_const_offset(union lp_type type) * else for the fixed points types and normalized integers. */ double -lp_const_scale(union lp_type type) +lp_const_scale(struct lp_type type) { unsigned long long llscale; double dscale; @@ -122,7 +122,7 @@ lp_const_scale(union lp_type type) * Minimum value representable by the type. */ double -lp_const_min(union lp_type type) +lp_const_min(struct lp_type type) { unsigned bits; @@ -158,7 +158,7 @@ lp_const_min(union lp_type type) * Maximum value representable by the type. */ double -lp_const_max(union lp_type type) +lp_const_max(struct lp_type type) { unsigned bits; @@ -190,7 +190,7 @@ lp_const_max(union lp_type type) double -lp_const_eps(union lp_type type) +lp_const_eps(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -211,7 +211,7 @@ lp_const_eps(union lp_type type) LLVMValueRef -lp_build_undef(union lp_type type) +lp_build_undef(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMGetUndef(vec_type); @@ -219,7 +219,7 @@ lp_build_undef(union lp_type type) LLVMValueRef -lp_build_zero(union lp_type type) +lp_build_zero(struct lp_type type) { LLVMTypeRef vec_type = lp_build_vec_type(type); return LLVMConstNull(vec_type); @@ -227,7 +227,7 @@ lp_build_zero(union lp_type type) LLVMValueRef -lp_build_one(union lp_type type) +lp_build_one(struct lp_type type) { LLVMTypeRef elem_type; LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -269,7 +269,7 @@ lp_build_one(union lp_type type) LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); @@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type, LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); @@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type, LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle) { @@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type, LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]) +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]) { LLVMTypeRef elem_type = LLVMIntType(type.width); LLVMValueRef masks[LP_MAX_VECTOR_LENGTH]; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index 1934530ea3c..ffb302f7366 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,67 +42,67 @@ #include <pipe/p_compiler.h> -union lp_type type; +struct lp_type type; unsigned -lp_mantissa(union lp_type type); +lp_mantissa(struct lp_type type); unsigned -lp_const_shift(union lp_type type); +lp_const_shift(struct lp_type type); unsigned -lp_const_offset(union lp_type type); +lp_const_offset(struct lp_type type); double -lp_const_scale(union lp_type type); +lp_const_scale(struct lp_type type); double -lp_const_min(union lp_type type); +lp_const_min(struct lp_type type); double -lp_const_max(union lp_type type); +lp_const_max(struct lp_type type); double -lp_const_eps(union lp_type type); +lp_const_eps(struct lp_type type); LLVMValueRef -lp_build_undef(union lp_type type); +lp_build_undef(struct lp_type type); LLVMValueRef -lp_build_zero(union lp_type type); +lp_build_zero(struct lp_type type); LLVMValueRef -lp_build_one(union lp_type type); +lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(union lp_type type, +lp_build_const_scalar(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(union lp_type type, +lp_build_int_const_scalar(struct lp_type type, long long val); LLVMValueRef -lp_build_const_aos(union lp_type type, +lp_build_const_aos(struct lp_type type, double r, double g, double b, double a, const unsigned char *swizzle); LLVMValueRef -lp_build_const_mask_aos(union lp_type type, - boolean cond[4]); +lp_build_const_mask_aos(struct lp_type type, + const boolean cond[4]); #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index c8954c8a34f..186cac70f62 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -86,7 +86,7 @@ */ LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src) { @@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, int shift = dst_width - n; res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); - /* Fill in the empty lower bits for added precision? */ + /* TODO: Fill in the empty lower bits for additional precision? */ #if 0 { LLVMValueRef msb; @@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src) { LLVMTypeRef vec_type = lp_build_vec_type(dst_type); @@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n) * Expand the bit width. * * This will only change the number of bits the values are represented, not the - * values themselved. + * values themselves. */ static void lp_build_expand(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, LLVMValueRef src, LLVMValueRef *dst, unsigned num_dsts) { @@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder, dst[0] = src; while(src_type.width < dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; LLVMTypeRef new_vec_type; new_type.width *= 2; @@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder, */ static LLVMValueRef lp_build_pack2(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, boolean clamped, LLVMValueRef lo, LLVMValueRef hi) @@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder, * TODO: Handle saturation consistently. */ static LLVMValueRef -lp_build_trunc(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) { LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned i; @@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder, tmp[i] = src[i]; while(src_type.width > dst_type.width) { - union lp_type new_type = src_type; + struct lp_type new_type = src_type; new_type.width /= 2; new_type.length *= 2; @@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder, */ void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { - union lp_type tmp_type; + struct lp_type tmp_type; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; unsigned num_tmps; unsigned i; @@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder, * Clamp if necessary */ - if(src_type.value != dst_type.value) { + if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) { struct lp_build_context bld; double src_min = lp_const_min(src_type); double dst_min = lp_const_min(dst_type); @@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width > dst_type.width) { assert(num_dsts == 1); - tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); + tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = 1; @@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder, */ void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts) { @@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, if(src_type.width > dst_type.width) { assert(num_dsts == 1); - dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs); + dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs); } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index 05c1ef2a100..ca378804d2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,33 +40,33 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; LLVMValueRef lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, - union lp_type src_type, + struct lp_type src_type, unsigned dst_width, LLVMValueRef src); LLVMValueRef lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, unsigned src_width, - union lp_type dst_type, + struct lp_type dst_type, LLVMValueRef src); void lp_build_conv(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); void lp_build_conv_mask(LLVMBuilderRef builder, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, const LLVMValueRef *src, unsigned num_srcs, LLVMValueRef *dst, unsigned num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 30925b5f415..59d8f492e60 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -30,10 +30,27 @@ #include <udis86.h> #endif +#include "util/u_math.h" #include "util/u_debug.h" #include "lp_bld_debug.h" +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_pot(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + void lp_disassemble(const void* func) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h index ecdafef76d0..583e6132b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h @@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...) } +boolean +lp_check_alignment(const void *ptr, unsigned alignment); + + void lp_disassemble(const void* func); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2cd6e6b9217..21c665c4d4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,11 +71,11 @@ /** * Return a type appropriate for depth/stencil testing. */ -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length) { - union lp_type type; + struct lp_type type; unsigned swizzle; assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); @@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc, swizzle = format_desc->swizzle[0]; assert(swizzle < 4); - type.value = 0; + memset(&type, 0, sizeof type); type.width = format_desc->block.bits; if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { @@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, @@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder, padding_right = 0; for(chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size; + padding_left = format_desc->block.bits - + (padding_right + format_desc->channel[z_swizzle].size); if(padding_left || padding_right) { - const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1; - const long long mask_right = ((long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right); + const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; + const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; + z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); } if(padding_left) @@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMBuildStore(builder, dst, dst_ptr); } + /* FIXME */ assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 5d2e042fcc5..79d6981bb51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -41,11 +41,11 @@ struct pipe_depth_state; struct util_format_description; -union lp_type; +struct lp_type; struct lp_build_mask_context; -union lp_type +struct lp_type lp_depth_type(const struct util_format_description *format_desc, unsigned length); @@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc, void lp_build_depth_test(LLVMBuilderRef builder, const struct pipe_depth_state *state, - union lp_type type, + struct lp_type type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef src, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index 9d99e1a9d9f..dcc25fbff86 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -32,59 +32,261 @@ */ #include "util/u_debug.h" +#include "util/u_memory.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#define LP_BUILD_FLOW_MAX_VARIABLES 32 +#define LP_BUILD_FLOW_MAX_DEPTH 32 + + +/** + * Enumeration of all possible flow constructs. + */ +enum lp_build_flow_construct_kind { + lP_BUILD_FLOW_SCOPE, + LP_BUILD_FLOW_SKIP, +}; + + +/** + * Variable declaration scope. + */ +struct lp_build_flow_scope +{ + /** Number of variables declared in this scope */ + unsigned num_variables; +}; + + +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_flow_skip +{ + /** Block to skip to */ + LLVMBasicBlockRef block; + + /** Number of variables declared at the beginning */ + unsigned num_variables; + + LLVMValueRef *phi; +}; + + +/** + * Union of all possible flow constructs' data + */ +union lp_build_flow_construct_data +{ + struct lp_build_flow_scope scope; + struct lp_build_flow_skip skip; +}; + + +/** + * Element of the flow construct stack. + */ +struct lp_build_flow_construct +{ + enum lp_build_flow_construct_kind kind; + union lp_build_flow_construct_data data; +}; + + +/** + * All necessary data to generate LLVM control flow constructs. + * + * Besides keeping track of the control flow construct themselves we also + * need to keep track of variables in order to generate SSA Phi values. + */ +struct lp_build_flow_context +{ + LLVMBuilderRef builder; + + /** + * Control flow stack. + */ + struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; + unsigned num_constructs; + + /** + * Variable stack + */ + LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; + unsigned num_variables; +}; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder) +{ + struct lp_build_flow_context *flow; + + flow = CALLOC_STRUCT(lp_build_flow_context); + if(!flow) + return NULL; + + flow->builder = builder; + + return flow; +} + + void -lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, - LLVMValueRef value) +lp_build_flow_destroy(struct lp_build_flow_context *flow) { - memset(mask, 0, sizeof *mask); + assert(flow->num_constructs == 0); + assert(flow->num_variables == 0); + FREE(flow); +} - mask->builder = builder; - mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + +static union lp_build_flow_construct_data * +lp_build_flow_push(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); + if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) + return NULL; + + flow->constructs[flow->num_constructs].kind = kind; + return &flow->constructs[flow->num_constructs++].data; +} + + +static union lp_build_flow_construct_data * +lp_build_flow_peek(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[flow->num_constructs - 1].data; } +static union lp_build_flow_construct_data * +lp_build_flow_pop(struct lp_build_flow_context *flow, + enum lp_build_flow_construct_kind kind) +{ + assert(flow->num_constructs); + if(!flow->num_constructs) + return NULL; + + assert(flow->constructs[flow->num_constructs - 1].kind == kind); + if(flow->constructs[flow->num_constructs - 1].kind != kind) + return NULL; + + return &flow->constructs[--flow->num_constructs].data; +} + + +/** + * Begin a variable scope. + * + * + */ void -lp_build_mask_update(struct lp_build_mask_context *mask, - LLVMValueRef value) +lp_build_flow_scope_begin(struct lp_build_flow_context *flow) { + struct lp_build_flow_scope *scope; - LLVMValueRef cond; - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; + scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(mask->value) - mask->value = LLVMBuildAnd(mask->builder, mask->value, value, ""); - else - mask->value = value; + scope->num_variables = 0; +} - /* FIXME: disabled until we have proper control flow helpers */ -#if 0 - cond = LLVMBuildICmp(mask->builder, - LLVMIntEQ, - LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""), - LLVMConstNull(mask->reg_type), - ""); - current_block = LLVMGetInsertBlock(mask->builder); +/** + * Declare a variable. + * + * A variable is a named entity which can have different LLVMValueRef's at + * different points of the program. This is relevant for control flow because + * when there are mutiple branches to a same location we need to replace + * the variable's value with a Phi function as explained in + * http://en.wikipedia.org/wiki/Static_single_assignment_form . + * + * We keep track of variables by keeping around a pointer to where their + * current. + * + * There are a few cautions to observe: + * + * - Variable's value must not be NULL. If there is no initial value then + * LLVMGetUndef() should be used. + * + * - Variable's value must be kept up-to-date. If the variable is going to be + * modified by a function then a pointer should be passed so that its value + * is accurate. Failure to do this will cause some of the variables' + * transient values to be lost, leading to wrong results. + * + * - A program should be written from top to bottom, by always appending + * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or + * modifying existing statements will most likely lead to wrong results. + * + */ +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - if(!mask->skip_block) { - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - mask->skip_block = LLVMAppendBasicBlock(function, "skip"); + assert(*variable); + if(!*variable) + return; + + assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); + if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) + return; + + flow->variables[flow->num_variables++] = variable; + ++scope->num_variables; +} + + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_scope *scope; + + scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope; + if(!scope) + return; - mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), ""); + assert(flow->num_variables >= scope->num_variables); + if(flow->num_variables < scope->num_variables) { + flow->num_variables = 0; + return; } + flow->num_variables -= scope->num_variables; +} + + +static LLVMBasicBlockRef +lp_build_flow_insert_block(struct lp_build_flow_context *flow) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + current_block = LLVMGetInsertBlock(flow->builder); + next_block = LLVMGetNextBasicBlock(current_block); - assert(next_block); if(next_block) { new_block = LLVMInsertBasicBlock(next_block, ""); } @@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask, new_block = LLVMAppendBasicBlock(function, ""); } - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block); + return new_block; +} + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +{ + struct lp_build_flow_skip *skip; + LLVMBuilderRef builder; + unsigned i; + + skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + skip->block = lp_build_flow_insert_block(flow); + skip->num_variables = flow->num_variables; + if(!skip->num_variables) { + skip->phi = NULL; + return; + } - LLVMPositionBuilderAtEnd(mask->builder, new_block); -#endif + skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); + if(!skip->phi) { + skip->num_variables = 0; + return; + } + + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, skip->block); + + for(i = 0; i < skip->num_variables; ++i) + skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); + + LLVMDisposeBuilder(builder); } -LLVMValueRef -lp_build_mask_end(struct lp_build_mask_context *mask) +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond) +{ + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef new_block; + unsigned i; + + skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; + + current_block = LLVMGetInsertBlock(flow->builder); + + new_block = lp_build_flow_insert_block(flow); + + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + } + + LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + + LLVMPositionBuilderAtEnd(flow->builder, new_block); + } + + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow) { - if(mask->skip_block) { - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder); + struct lp_build_flow_skip *skip; + LLVMBasicBlockRef current_block; + unsigned i; - LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1); - LLVMBuildBr(mask->builder, mask->skip_block); + skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; + if(!skip) + return; - LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block); + current_block = LLVMGetInsertBlock(flow->builder); - mask->value = mask->phi; - mask->phi = NULL; - mask->skip_block = NULL; + for(i = 0; i < skip->num_variables; ++i) { + assert(*flow->variables[i]); + LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); + *flow->variables[i] = skip->phi[i]; } + LLVMBuildBr(flow->builder, skip->block); + LLVMPositionBuilderAtEnd(flow->builder, skip->block); + + FREE(skip->phi); +} + + +static void +lp_build_mask_check(struct lp_build_mask_context *mask) +{ + LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef cond; + + cond = LLVMBuildICmp(builder, + LLVMIntEQ, + LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMConstNull(mask->reg_type), + ""); + + lp_build_flow_skip_cond_break(mask->flow, cond); +} + + +void +lp_build_mask_begin(struct lp_build_mask_context *mask, + struct lp_build_flow_context *flow, + struct lp_type type, + LLVMValueRef value) +{ + memset(mask, 0, sizeof *mask); + + mask->flow = flow; + mask->reg_type = LLVMIntType(type.width * type.length); + mask->value = value; + + lp_build_flow_scope_begin(flow); + lp_build_flow_scope_declare(flow, &mask->value); + lp_build_flow_skip_begin(flow); + + lp_build_mask_check(mask); +} + + +void +lp_build_mask_update(struct lp_build_mask_context *mask, + LLVMValueRef value) +{ + mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + + lp_build_mask_check(mask); +} + + +LLVMValueRef +lp_build_mask_end(struct lp_build_mask_context *mask) +{ + lp_build_flow_skip_end(mask->flow); + lp_build_flow_scope_end(mask->flow); return mask->value; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h index 1b634ff038d..e61999ff06b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h @@ -38,27 +38,53 @@ #include <llvm-c/Core.h> -union lp_type; +struct lp_type; + + +struct lp_build_flow_context; + + +struct lp_build_flow_context * +lp_build_flow_create(LLVMBuilderRef builder); + +void +lp_build_flow_destroy(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_scope_declare(struct lp_build_flow_context *flow, + LLVMValueRef *variable); + +void +lp_build_flow_scope_end(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_begin(struct lp_build_flow_context *flow); + +void +lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, + LLVMValueRef cond); + +void +lp_build_flow_skip_end(struct lp_build_flow_context *flow); struct lp_build_mask_context { - LLVMBuilderRef builder; + struct lp_build_flow_context *flow; LLVMTypeRef reg_type; LLVMValueRef value; - - LLVMValueRef phi; - - LLVMBasicBlockRef skip_block; }; void lp_build_mask_begin(struct lp_build_mask_context *mask, - LLVMBuilderRef builder, - union lp_type type, + struct lp_build_flow_context *flow, + struct lp_type type, LLVMValueRef value); /** diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 01c8a752d18..6d3f6926193 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -31,21 +31,15 @@ /** * @file - * LLVM IR building helpers interfaces. - * - * We use LLVM-C bindings for now. They are not documented, but follow the C++ - * interfaces very closely, and appear to be complete enough for code - * genration. See - * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html - * for a standalone example. + * Pixel format helpers. */ #include <llvm-c/Core.h> - -#include "pipe/p_format.h" +#include "pipe/p_format.h" -union lp_type; +struct util_format_description; +struct lp_type; /** @@ -56,9 +50,9 @@ union lp_type; * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed); /** @@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba); /** @@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, * @return RGBA in a 4 floats vector. */ LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr); /** @@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder, * @param rgba 4 float vector with the unpacked components. */ void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba); + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba); #endif /* !LP_BLD_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index dcbc0076c7d..b9b5d84bed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -32,9 +32,9 @@ LLVMValueRef -lp_build_unpack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed) +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef packed) { const struct util_format_description *desc; LLVMTypeRef type; @@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_pack_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba) +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder, LLVMValueRef -lp_build_load_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) +lp_build_load_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr) { const struct util_format_description *desc; LLVMTypeRef type; @@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, ""); - return lp_build_unpack_rgba(builder, format, packed); + return lp_build_unpack_rgba_aos(builder, format, packed); } void -lp_build_store_rgba(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) +lp_build_store_rgba_aos(LLVMBuilderRef builder, + enum pipe_format format, + LLVMValueRef ptr, + LLVMValueRef rgba) { const struct util_format_description *desc; LLVMTypeRef type; @@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder, type = LLVMIntType(desc->block.bits); - packed = lp_build_pack_rgba(builder, format, rgba); + packed = lp_build_pack_rgba_aos(builder, format, rgba); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c new file mode 100644 index 00000000000..b5ff434e1ae --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_format.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_format.h" + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +static LLVMValueRef +lp_build_format_swizzle(struct lp_type type, + const LLVMValueRef *inputs, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + return inputs[swizzle]; + case UTIL_FORMAT_SWIZZLE_0: + return lp_build_zero(type); + case UTIL_FORMAT_SWIZZLE_1: + return lp_build_one(type); + case UTIL_FORMAT_SWIZZLE_NONE: + return lp_build_undef(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +void +lp_build_unpack_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef inputs[4]; + unsigned start; + unsigned chan; + + /* FIXME: Support more formats */ + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + /* Decode the input vector components */ + start = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned width = format_desc->channel[chan].size; + unsigned stop = start + width; + LLVMValueRef input; + + input = packed; + + switch(format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + input = NULL; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + if(type.floating) { + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + if(stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + } + + if(format_desc->channel[chan].normalized) + input = lp_build_unsigned_norm_to_float(builder, width, type, input); + else + input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + default: + /* fall through */ + input = lp_build_undef(type); + break; + } + + inputs[chan] = input; + + start = stop; + } + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); + rgba[2] = rgba[1] = rgba[0] = depth; + rgba[3] = lp_build_one(type); + } + else { + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); + } + } +} + + +void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= 32); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index cfe20a0d75b..338dbca6d1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -292,7 +292,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 9194f6233a7..9c57a10879b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -83,7 +83,7 @@ void lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct tgsi_token *tokens, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 8631efd6c3e..6b6f8207697 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); @@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - union lp_type type = bld->type; + struct lp_type type = bld->type; LLVMValueRef res; if(a == b) @@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld, b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); } - /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -339,9 +337,9 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]) + const boolean cond[4]) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); } + else { #if 0 - else if(0) { - /* FIXME: Unfortunately select of vectors do not work */ + /* XXX: Unfortunately select of vectors do not work */ /* Use a select */ LLVMTypeRef elem_type = LLVMInt1Type(); LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; @@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld, cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); - } -#endif - else { +#else LLVMValueRef mask = lp_build_const_mask_aos(type, cond); return lp_build_select(bld, mask, a, b); +#endif } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index 29b9e1c45b8..a4ee7723b5f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -66,7 +66,7 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - boolean cond[4]); + const boolean cond[4]); #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h new file mode 100644 index 00000000000..403d0e48367 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#ifndef LP_BLD_SAMPLE_H +#define LP_BLD_SAMPLE_H + + +#include <llvm-c/Core.h> + +struct pipe_texture; +struct pipe_sampler_state; +struct lp_type; + + +/** + * Sampler static state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are embedded in the generated code. + */ +struct lp_sampler_static_state +{ + /* pipe_texture's state */ + enum pipe_format format; + unsigned target:2; + unsigned pot_width:1; + unsigned pot_height:1; + unsigned pot_depth:1; + + /* pipe_sampler_state's state */ + unsigned wrap_s:3; + unsigned wrap_t:3; + unsigned wrap_r:3; + unsigned min_img_filter:2; + unsigned min_mip_filter:2; + unsigned mag_img_filter:2; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned normalized_coords:1; + unsigned prefilter:4; +}; + + +/** + * Sampler dynamic state. + * + * These are the bits of state from pipe_texture and pipe_sampler_state that + * are computed in runtime. + * + * There are obtained through callbacks, as we don't want to tie the texture + * sampling code generation logic to any particular texture layout or pipe + * driver. + */ +struct lp_sampler_dynamic_state +{ + + /** Obtain the base texture width. */ + LLVMValueRef + (*width)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + /** Obtain the base texture height. */ + LLVMValueRef + (*height)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + + LLVMValueRef + (*data_ptr)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); + +}; + + +/** + * Derive the sampler static state. + */ +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler); + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type fp_type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); + + + +#endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c new file mode 100644 index 00000000000..8ca1be6f1be --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -0,0 +1,416 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + + +/** + * Keep all information for sampling code generation in a single place. + */ +struct lp_build_sample_context +{ + LLVMBuilderRef builder; + + const struct lp_sampler_static_state *static_state; + + struct lp_sampler_dynamic_state *dynamic_state; + + const struct util_format_description *format_desc; + + /** Incoming coordinates type and build context */ + struct lp_type coord_type; + struct lp_build_context coord_bld; + + /** Integer coordinates */ + struct lp_type int_coord_type; + struct lp_build_context int_coord_bld; + + /** Output texels type and build context */ + struct lp_type texel_type; + struct lp_build_context texel_bld; +}; + + +static void +lp_build_sample_texel(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); + + if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); + + x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(int_coord_bld, x, x_stride); + y_offset = lp_build_mul(int_coord_bld, y, y_stride); + + offset = lp_build_add(int_coord_bld, x_offset, y_offset); + } + + lp_build_load_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, + offset, + texel); +} + + +static LLVMValueRef +lp_build_sample_wrap(struct lp_build_sample_context *bld, + LLVMValueRef coord, + LLVMValueRef length, + boolean is_pot, + unsigned wrap_mode) +{ + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if(is_pot) + coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); + else + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord = LLVMBuildURem(bld->builder, coord, length, ""); + break; + + case PIPE_TEX_WRAP_CLAMP: + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* FIXME */ + _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); + coord = lp_build_min(int_coord_bld, coord, length_minus_one); + break; + + default: + assert(0); + } + + return coord; +} + + +static void +lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef x; + LLVMValueRef y; + + x = lp_build_ifloor(&bld->coord_bld, s); + y = lp_build_ifloor(&bld->coord_bld, t); + + x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); +} + + +static void +lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMValueRef half; + LLVMValueRef s_ipart; + LLVMValueRef t_ipart; + LLVMValueRef s_fpart; + LLVMValueRef t_fpart; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2][4]; + unsigned chan; + + half = lp_build_const_scalar(bld->coord_type, 0.5); + s = lp_build_sub(&bld->coord_bld, s, half); + t = lp_build_sub(&bld->coord_bld, t, half); + + s_ipart = lp_build_floor(&bld->coord_bld, s); + t_ipart = lp_build_floor(&bld->coord_bld, t); + + s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); + t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); + + x0 = lp_build_int(&bld->coord_bld, s_ipart); + y0 = lp_build_int(&bld->coord_bld, t_ipart); + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + + /* TODO: Don't interpolate missing channels */ + for(chan = 0; chan < 4; ++chan) { + texel[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } +} + + +static void +lp_build_sample_compare(struct lp_build_sample_context *bld, + LLVMValueRef p, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + LLVMValueRef res; + unsigned chan; + + if(!bld->static_state->compare_mode) + return; + + /* TODO: Compare before swizzling, to avoid redundant computations */ + res = NULL; + for(chan = 0; chan < 4; ++chan) { + LLVMValueRef cmp; + cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); + cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); + + if(res) + res = lp_build_add(texel_bld, res, cmp); + else + res = cmp; + } + + assert(res); + res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for(chan = 0; chan < 3; ++chan) + texel[chan] = res; + texel[3] = texel_bld->one; +} + + +void +lp_build_sample_soa(LLVMBuilderRef builder, + const struct lp_sampler_static_state *static_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_build_sample_context bld; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef stride; + LLVMValueRef data_ptr; + LLVMValueRef s; + LLVMValueRef t; + LLVMValueRef p; + + /* Setup our build context */ + memset(&bld, 0, sizeof bld); + bld.builder = builder; + bld.static_state = static_state; + bld.dynamic_state = dynamic_state; + bld.format_desc = util_format_description(static_state->format); + bld.coord_type = type; + bld.int_coord_type = lp_int_type(type); + bld.texel_type = type; + lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); + lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); + lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); + + /* Get the dynamic state */ + width = dynamic_state->width(dynamic_state, builder, unit); + height = dynamic_state->height(dynamic_state, builder, unit); + stride = dynamic_state->stride(dynamic_state, builder, unit); + data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + + s = coords[0]; + t = coords[1]; + p = coords[2]; + + width = lp_build_broadcast_scalar(&bld.int_coord_bld, width); + height = lp_build_broadcast_scalar(&bld.int_coord_bld, height); + stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride); + + if(static_state->target == PIPE_TEXTURE_1D) + t = bld.coord_bld.zero; + + if(static_state->normalized_coords) { + LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type); + LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, ""); + LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, ""); + s = lp_build_mul(&bld.coord_bld, s, fp_width); + t = lp_build_mul(&bld.coord_bld, t, fp_height); + } + + switch (static_state->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + break; + default: + assert(0); + } + + /* FIXME: respect static_state->min_mip_filter */; + /* FIXME: respect static_state->mag_img_filter */; + /* FIXME: respect static_state->prefilter */; + + lp_build_sample_compare(&bld, p, texel); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c index 14d2b10df9c..3998ac374fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c @@ -42,17 +42,30 @@ LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name) +{ + LLVMValueRef indices[2]; + LLVMValueRef member_ptr; + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); + member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name); + return member_ptr; +} + + +LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, unsigned member, const char *name) { - LLVMValueRef indices[2]; LLVMValueRef member_ptr; LLVMValueRef res; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0); - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); + member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name); res = LLVMBuildLoad(builder, member_ptr, ""); lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name); return res; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h index cbefdc9f815..740392f5611 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h @@ -53,6 +53,18 @@ offsetof(_ctype, _cmember)) +/** + * Get value pointer to a structure member. + */ +LLVMValueRef +lp_build_struct_get_ptr(LLVMBuilderRef builder, + LLVMValueRef ptr, + unsigned member, + const char *name); + +/** + * Get the value of a structure member. + */ LLVMValueRef lp_build_struct_get(LLVMBuilderRef builder, LLVMValueRef ptr, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index ac7eed9379a..64e81f7b1fe 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -64,7 +64,7 @@ LLVMValueRef lp_build_broadcast_scalar(struct lp_build_context *bld, LLVMValueRef scalar) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; unsigned i; @@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; @@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - union lp_type type4 = type; + struct lp_type type4 = type; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -192,7 +192,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]) + const unsigned char swizzle[4]) { const unsigned n = bld->type.length; unsigned i, j; @@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld, return lp_build_swizzle1_aos(bld, a, swizzle); if(a == b) { - swizzle[0] %= 4; - swizzle[1] %= 4; - swizzle[2] %= 4; - swizzle[3] %= 4; - return lp_build_swizzle1_aos(bld, a, swizzle); + unsigned char swizzle1[4]; + swizzle1[0] = swizzle[0] % 4; + swizzle1[1] = swizzle[1] % 4; + swizzle1[2] = swizzle[2] % 4; + swizzle1[3] = swizzle[3] % 4; + return lp_build_swizzle1_aos(bld, a, swizzle1); } if(swizzle[0] % 4 == 0 && diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index d7dd6a8a604..1f6da804488 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -union lp_type type; +struct lp_type type; struct lp_build_context; @@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef lp_build_swizzle1_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); /** @@ -85,7 +85,7 @@ LLVMValueRef lp_build_swizzle2_aos(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, - unsigned char swizzle[4]); + const unsigned char swizzle[4]); #endif /* !LP_BLD_SWIZZLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h index 912db24aecb..eddb7a83fa2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h @@ -39,31 +39,46 @@ struct tgsi_token; -union lp_type; +struct lp_type; struct lp_build_context; struct lp_build_mask_context; -typedef void -(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel); +/** + * Sampler code generation interface. + * + * Although texture sampling is a requirement for TGSI translation, it is + * a very different problem with several different approaches to it. This + * structure establishes an interface for texture sampling code generation, so + * that we can easily use different texture sampling strategies. + */ +struct lp_build_sampler_soa +{ + void + (*destroy)( struct lp_build_sampler_soa *sampler ); + + void + (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel); +}; + void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context); + struct lp_build_sampler_soa *sampler); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec7..adc81569ed5 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -78,6 +78,11 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + struct lp_build_tgsi_soa_context { @@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - lp_emit_fetch_texel_soa_callback emit_fetch_texel; - void *emit_fetch_texel_context; + struct lp_build_sampler_soa *sampler; LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; @@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context }; +static const unsigned char +swizzle_left[4] = { + QUAD_TOP_LEFT, QUAD_TOP_LEFT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT +}; + +static const unsigned char +swizzle_right[4] = { + QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, + QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT +}; + +static const unsigned char +swizzle_top[4] = { + QUAD_TOP_LEFT, QUAD_TOP_RIGHT, + QUAD_TOP_LEFT, QUAD_TOP_RIGHT +}; + +static const unsigned char +swizzle_bottom[4] = { + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, + QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT +}; + + +static LLVMValueRef +emit_ddx(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); + LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); + return lp_build_sub(&bld->base, src_right, src_left); +} + + +static LLVMValueRef +emit_ddy(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef src) +{ + LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); + LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); + return lp_build_sub(&bld->base, src_top, src_bottom); +} + + /** * Register fetch. */ @@ -168,6 +217,7 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: + /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); res = LLVMBuildNeg( bld->base.builder, res, "" ); break; @@ -185,6 +235,36 @@ emit_fetch( /** + * Register fetch with derivatives. + */ +static void +emit_fetch_deriv( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + const unsigned chan_index, + LLVMValueRef *res, + LLVMValueRef *ddx, + LLVMValueRef *ddy) +{ + LLVMValueRef src; + + src = emit_fetch(bld, inst, index, chan_index); + + if(res) + *res = src; + + /* TODO: use interpolation coeffs for inputs */ + + if(ddx) + *ddx = emit_ddx(bld, src); + + if(ddy) + *ddy = emit_ddy(bld, src); +} + + +/** * Register store. */ static void @@ -239,17 +319,18 @@ emit_store( * High-level instruction translators. */ + static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, boolean apply_lodbias, - boolean projected) + boolean projected, + LLVMValueRef *texel) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; - LLVMValueRef texel[4]; unsigned num_coords; unsigned i; @@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, coords[i] = lp_build_mul(&bld->base, coords[i], oow); } - bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context, - unit, num_coords, coords, lodbias, texel); - - FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) { - emit_store( bld, inst, 0, i, texel[i] ); - } + bld->sampler->emit_fetch_texel(bld->sampler, + bld->base.builder, + bld->base.type, + unit, num_coords, coords, lodbias, + texel); } @@ -347,14 +427,6 @@ emit_kil( } -static void -emit_kilp( - struct lp_build_tgsi_soa_context *bld ) -{ - /* XXX todo / fix me */ -} - - /** * Check if inst src/dest regs use indirect addressing into temporary * register file. @@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) static int emit_instruction( struct lp_build_tgsi_soa_context *bld, - struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info) { unsigned chan_index; LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - LLVMValueRef dst0; + LLVMValueRef res; + LLVMValueRef dst0[NUM_CHANNELS]; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) return FALSE; + assert(info->num_dst <= 1); + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.undef; + } + } + switch (inst->Instruction.Opcode) { #if 0 case TGSI_OPCODE_ARL: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif @@ -408,19 +490,17 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } break; case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { - emit_store( bld, inst, 0, CHAN_X, bld->base.one); + dst0[CHAN_X] = bld->base.one; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_max( &bld->base, src0, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Y, dst0); + dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ @@ -432,20 +512,19 @@ emit_instruction( tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - emit_store( bld, inst, 0, CHAN_Z, dst0); + dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { - emit_store( bld, inst, 0, CHAN_W, bld->base.one); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); - dst0 = lp_build_rcp(&bld->base, src0); + res = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -453,9 +532,9 @@ emit_instruction( /* TGSI_OPCODE_RECIPSQRT */ src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, src0); + res = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -479,16 +558,15 @@ emit_instruction( lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = tmp1; if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -514,20 +592,18 @@ emit_instruction( /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = tmp0; /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { - tmp1 = lp_build_div( &bld->base, src0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp1); + dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); } /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) - emit_store( bld, inst, 0, CHAN_Z, tmp2); + dst0[CHAN_Z] = tmp2; } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; @@ -535,8 +611,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_mul(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); } break; @@ -544,8 +619,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_add(&bld->base, src0, src1); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_add(&bld->base, src0, src1); } break; @@ -563,7 +637,7 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -585,28 +659,24 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = bld->base.one; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = emit_fetch( bld, inst, 0, CHAN_Z ); - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = emit_fetch( bld, inst, 1, CHAN_W ); - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); } break; @@ -614,8 +684,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_min( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); } break; @@ -623,8 +692,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0 = lp_build_max( &bld->base, src0, src1 ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); } break; @@ -634,8 +702,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -645,8 +712,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -658,7 +724,7 @@ emit_instruction( tmp2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -666,8 +732,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); } break; @@ -678,13 +743,19 @@ emit_instruction( src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_sub( &bld->base, src1, src2 ); tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0 = lp_build_add( &bld->base, tmp0, src2 ); - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); } break; case TGSI_OPCODE_CND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + src0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); + } break; case TGSI_OPCODE_DP2A: @@ -698,45 +769,49 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; -#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_frc( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + src0 = emit_fetch( bld, inst, 0, chan_index ); + tmp0 = lp_build_floor(&bld->base, src0); + tmp0 = lp_build_sub(&bld->base, tmp0, src0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_CLAMP: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + src1 = emit_fetch( bld, inst, 1, chan_index ); + src2 = emit_fetch( bld, inst, 2, chan_index ); + tmp0 = lp_build_max(&bld->base, tmp0, src1); + tmp0 = lp_build_min(&bld->base, tmp0, src2); + dst0[chan_index] = tmp0; + } break; case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_floor(&bld->base, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_round(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_EX2: { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; } @@ -745,16 +820,16 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_POW: src0 = emit_fetch( bld, inst, 0, CHAN_X ); src1 = emit_fetch( bld, inst, 1, CHAN_X ); - dst0 = lp_build_pow( &bld->base, src0, src1 ); + res = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0 ); + dst0[chan_index] = res; } break; @@ -775,7 +850,7 @@ emit_instruction( tmp5 = tmp3; tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - emit_store( bld, inst, 0, CHAN_X, tmp2); + dst0[CHAN_X] = tmp2; } if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { @@ -786,31 +861,30 @@ emit_instruction( tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - emit_store( bld, inst, 0, CHAN_Y, tmp3); + dst0[CHAN_Y] = tmp3; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - emit_store( bld, inst, 0, CHAN_Z, tmp5); + dst0[CHAN_Z] = tmp5; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_abs( &bld->base, tmp0 ) ; - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); } break; case TGSI_OPCODE_RCC: + /* deprecated? */ + assert(0); return 0; - break; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -827,7 +901,7 @@ emit_instruction( tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -835,25 +909,27 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; case TGSI_OPCODE_DDX: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); + } break; case TGSI_OPCODE_DDY: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); + } break; -#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld ); - return 0; /* XXX fix me */ + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_KIL: /* conditional kill */ @@ -885,13 +961,14 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_SFL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } break; case TGSI_OPCODE_SGT: @@ -899,8 +976,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -908,7 +984,7 @@ emit_instruction( tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; @@ -917,8 +993,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; @@ -927,85 +1002,99 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); } break; case TGSI_OPCODE_STR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.one; + } break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE ); + emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; case TGSI_OPCODE_TXD: + /* FIXME */ return 0; break; case TGSI_OPCODE_UP2H: + /* deprecated */ + assert (0); return 0; break; case TGSI_OPCODE_UP2US: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4B: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_UP4UB: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_X2D: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_ARA: + /* deprecated */ + assert(0); return 0; break; #if 0 case TGSI_OPCODE_ARR: + /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = tmp0; } break; #endif case TGSI_OPCODE_BRA: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_CAL: + /* FIXME */ return 0; break; -#if 0 case TGSI_OPCODE_RET: - emit_ret( bld ); + /* FIXME */ + return 0; break; -#endif case TGSI_OPCODE_END: break; -#if 0 case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_sgn( bld, 0, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); } break; -#endif case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1013,34 +1102,29 @@ emit_instruction( src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0 = lp_build_select( &bld->base, tmp0, src1, src2); - emit_store( bld, inst, 0, chan_index, dst0); + dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); } break; case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_X, tmp0); + dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - emit_store( bld, inst, 0, CHAN_Y, tmp0); + dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { - tmp0 = bld->base.zero; - emit_store( bld, inst, 0, CHAN_Z, tmp0); + dst0[CHAN_Z] = bld->base.zero; } IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { - tmp0 = bld->base.one; - emit_store( bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1099,38 +1183,35 @@ emit_instruction( /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { - tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); - emit_store(bld, inst, 0, CHAN_X, tmp4); + dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); - emit_store(bld, inst, 0, CHAN_Y, tmp5); + dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { - tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); - emit_store(bld, inst, 0, CHAN_Z, tmp6); + dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { - tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); - emit_store(bld, inst, 0, CHAN_W, tmp7); + dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); } } - /* dst0.w = 1.0 */ + /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { - tmp0 = bld->base.one; - emit_store(bld, inst, 0, CHAN_W, tmp0); + dst0[CHAN_W] = bld->base.one; } } break; case TGSI_OPCODE_DIV: + /* deprecated */ + assert( 0 ); return 0; break; @@ -1143,118 +1224,157 @@ emit_instruction( tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ + dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ } break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE ); + emit_tex( bld, inst, TRUE, FALSE, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE ); + emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; case TGSI_OPCODE_BRK: + /* FIXME */ return 0; break; case TGSI_OPCODE_IF: + /* FIXME */ return 0; break; case TGSI_OPCODE_BGNFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_REP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ELSE: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDIF: + /* FIXME */ return 0; break; case TGSI_OPCODE_ENDFOR: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_ENDREP: + /* deprecated */ + assert(0); return 0; break; case TGSI_OPCODE_PUSHA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_POPA: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CEIL: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + tmp0 = emit_fetch( bld, inst, 0, chan_index ); + dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); + } break; case TGSI_OPCODE_I2F: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_NOT: + /* deprecated? */ + assert(0); return 0; break; -#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_f2it( bld, 0 ); - emit_i2f( bld, 0 ); - emit_store( bld, inst, 0, chan_index, tmp0); + dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); } break; -#endif case TGSI_OPCODE_SHL: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SHR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_AND: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_OR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_MOD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_XOR: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_SAD: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXF: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); return 0; break; case TGSI_OPCODE_CONT: + /* deprecated? */ + assert(0); return 0; break; @@ -1266,10 +1386,28 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + dst0[chan_index] = bld->base.zero; + } + break; + + case TGSI_OPCODE_NOP: + break; + default: return 0; } + if(info->num_dst) { + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + } + } + return 1; } @@ -1277,14 +1415,13 @@ emit_instruction( void lp_build_tgsi_soa(LLVMBuilderRef builder, const struct tgsi_token *tokens, - union lp_type type, + struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - lp_emit_fetch_texel_soa_callback emit_fetch_texel, - void *emit_fetch_texel_context) + struct lp_build_sampler_soa *sampler) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; - bld.emit_fetch_texel = emit_fetch_texel; - bld.emit_fetch_texel_context = emit_fetch_texel_context; + bld.sampler = sampler; tgsi_parse_init( &parse, tokens ); @@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* Input already interpolated */ + /* Inputs already interpolated */ break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) { + { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); - } + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + info ? info->mnemonic : "<invalid>"); + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 8e0026fd973..606243d6c5a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -33,7 +33,7 @@ LLVMTypeRef -lp_build_elem_type(union lp_type type) +lp_build_elem_type(struct lp_type type) { if (type.floating) { switch(type.width) { @@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type) LLVMTypeRef -lp_build_vec_type(union lp_type type) +lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); return LLVMVectorType(elem_type, type.length); @@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type) * type and check for identity. */ boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type) { LLVMTypeKind elem_kind; @@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) { LLVMTypeRef elem_type; @@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) boolean -lp_check_value(union lp_type type, LLVMValueRef val) +lp_check_value(struct lp_type type, LLVMValueRef val) { LLVMTypeRef vec_type; @@ -143,24 +143,36 @@ lp_check_value(union lp_type type, LLVMValueRef val) LLVMTypeRef -lp_build_int_elem_type(union lp_type type) +lp_build_int_elem_type(struct lp_type type) { return LLVMIntType(type.width); } LLVMTypeRef -lp_build_int_vec_type(union lp_type type) +lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); return LLVMVectorType(elem_type, type.length); } +struct lp_type +lp_int_type(struct lp_type type) +{ + struct lp_type int_type; + + memset(&int_type, 0, sizeof int_type); + int_type.width = type.width; + int_type.length = type.length; + return int_type; +} + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type) + struct lp_type type) { bld->builder = builder; bld->type = type; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 3ce566be641..ee5ca3483c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -56,58 +56,55 @@ * on the types used for intermediate computations, such as signed vs unsigned, * normalized values, or fixed point. */ -union lp_type { - struct { - /** - * Floating-point. Cannot be used with fixed. Integer numbers are - * represented by this zero. - */ - unsigned floating:1; - - /** - * Fixed-point. Cannot be used with floating. Integer numbers are - * represented by this zero. - */ - unsigned fixed:1; - - /** - * Whether it can represent negative values or not. - * - * If this is not set for floating point, it means that all values are - * assumed to be positive. - */ - unsigned sign:1; - - /** - * Whether values are normalized to fit [0, 1] interval, or [-1, 1] - * interval for signed types. - * - * For integer types it means the representable integer range should be - * interpreted as the interval above. - * - * For floating and fixed point formats it means the values should be - * clamped to the interval above. - */ - unsigned norm:1; - - /** - * Element width. - * - * For fixed point values, the fixed point is assumed to be at half the - * width. - */ - unsigned width:14; - - /** - * Vector length. - * - * width*length should be a power of two greater or equal to eight. - * - * @sa LP_MAX_VECTOR_LENGTH - */ - unsigned length:14; - }; - uint32_t value; +struct lp_type { + /** + * Floating-point. Cannot be used with fixed. Integer numbers are + * represented by this zero. + */ + unsigned floating:1; + + /** + * Fixed-point. Cannot be used with floating. Integer numbers are + * represented by this zero. + */ + unsigned fixed:1; + + /** + * Whether it can represent negative values or not. + * + * If this is not set for floating point, it means that all values are + * assumed to be positive. + */ + unsigned sign:1; + + /** + * Whether values are normalized to fit [0, 1] interval, or [-1, 1] + * interval for signed types. + * + * For integer types it means the representable integer range should be + * interpreted as the interval above. + * + * For floating and fixed point formats it means the values should be + * clamped to the interval above. + */ + unsigned norm:1; + + /** + * Element width. + * + * For fixed point values, the fixed point is assumed to be at half the + * width. + */ + unsigned width:14; + + /** + * Vector length. + * + * width*length should be a power of two greater or equal to eight. + * + * @sa LP_MAX_VECTOR_LENGTH + */ + unsigned length:14; }; @@ -124,7 +121,7 @@ struct lp_build_context * This not only describes the input/output LLVM types, but also whether * to normalize/clamp the results. */ - union lp_type type; + struct lp_type type; /** Same as lp_build_undef(type) */ LLVMValueRef undef; @@ -138,37 +135,41 @@ struct lp_build_context LLVMTypeRef -lp_build_elem_type(union lp_type type); +lp_build_elem_type(struct lp_type type); LLVMTypeRef -lp_build_vec_type(union lp_type type); +lp_build_vec_type(struct lp_type type); boolean -lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type); +lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type); boolean -lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type); boolean -lp_check_value(union lp_type type, LLVMValueRef val); +lp_check_value(struct lp_type type, LLVMValueRef val); LLVMTypeRef -lp_build_int_elem_type(union lp_type type); +lp_build_int_elem_type(struct lp_type type); LLVMTypeRef -lp_build_int_vec_type(union lp_type type); +lp_build_int_vec_type(struct lp_type type); + + +struct lp_type +lp_int_type(struct lp_type type); void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, - union lp_type type); + struct lp_type type); #endif /* !LP_BLD_TYPE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index 580cca5b463..bdcff94b9bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, util_pack_color(rgba, ps->format, &cv); lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); } + llvmpipe->dirty_render_cache = TRUE; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 233d1df0e10..a4b2bd8c2ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_WRITE; } - /* FIXME: we also need to do the same for the texture cache */ - return PIPE_UNREFERENCED; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d288460a1b8..b4a22ff4a97 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -44,15 +44,47 @@ static void lp_jit_init_globals(struct llvmpipe_screen *screen) { - /* struct lp_jit_context */ + LLVMTypeRef texture_type; + + /* struct lp_jit_texture */ { LLVMTypeRef elem_types[4]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + screen->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + screen->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + screen->target, texture_type, + LP_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, + screen->target, texture_type, + LP_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + screen->target, texture_type); + + LLVMAddTypeName(screen->module, "texture", texture_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, screen->target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + screen->target, context_type, + LP_JIT_CONTEXT_TEXTURES_INDEX); LP_CHECK_STRUCT_SIZE(struct lp_jit_context, screen->target, context_type); @@ -117,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { - fprintf(stderr, "%s\n", error); + _debug_printf("%s\n", error); LLVMDisposeMessage(error); abort(); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index a7fb60f9f5c..58f716ede29 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -38,11 +38,31 @@ #include "lp_bld_struct.h" +#include "pipe/p_state.h" + struct tgsi_sampler; struct llvmpipe_screen; +struct lp_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DATA +}; + + + /** * This structure is passed directly to the generated fragment shader. * @@ -60,11 +80,12 @@ struct lp_jit_context struct tgsi_sampler **samplers; - /* TODO: alpha reference value */ float alpha_ref_value; - /* TODO: blend constant color */ + /* FIXME: store (also?) in floats */ uint8_t *blend_color; + + struct lp_jit_texture textures[PIPE_MAX_SAMPLERS]; }; @@ -80,6 +101,11 @@ struct lp_jit_context #define lp_jit_context_blend_color(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 3, "blend_color") +#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 + +#define lp_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") + typedef void (*lp_jit_frag_func)(struct lp_jit_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 125035771e5..ff7ef8658a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -67,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -196,8 +192,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - - struct pipe_winsys *winsys = _screen->winsys; + struct llvmpipe_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index d145f6d6bbc..2d2fc19a65f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_bld_debug.h" #include "lp_tile_cache.h" #include "lp_tile_soa.h" @@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe, else depth = NULL; - /* TODO: blend color */ + /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ + assert(lp_check_alignment(mask, 16)); - assert((((uintptr_t)mask) & 0xf) == 0); - assert((((uintptr_t)depth) & 0xf) == 0); - assert((((uintptr_t)color) & 0xf) == 0); - assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0); + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); /* run shader */ fs->current->jit_function( &llvmpipe->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index fb10329887d..7b26ce61a38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -36,6 +36,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "lp_jit.h" +#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */ #define LP_NEW_VIEWPORT 0x1 @@ -57,16 +58,20 @@ struct tgsi_sampler; struct vertex_info; - +struct pipe_context; +struct llvmpipe_context; struct lp_fragment_shader; struct lp_fragment_shader_variant_key { + enum pipe_format zsbuf_format; struct pipe_depth_state depth; struct pipe_alpha_state alpha; struct pipe_blend_state blend; + + struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 6fbb057937e..30fb41ea65d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) vinfo->num_attribs = 0; for (i = 0; i < lpfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(llvmpipe->draw, @@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: @@ -250,7 +267,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) if (llvmpipe->dirty & (LP_NEW_FS | LP_NEW_BLEND | - LP_NEW_DEPTH_STENCIL_ALPHA)) + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_SAMPLER | + LP_NEW_TEXTURE)) llvmpipe_update_fs( llvmpipe ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 94170bd7161..9faed5a0b18 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -85,6 +85,7 @@ #include "lp_context.h" #include "lp_state.h" #include "lp_quad.h" +#include "lp_tex_sample.h" static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; @@ -130,21 +131,20 @@ generate_pos0(LLVMBuilderRef builder, * Generate the depth test. */ static void -generate_depth(struct llvmpipe_context *lp, - LLVMBuilderRef builder, - const struct pipe_depth_state *state, - union lp_type src_type, +generate_depth(LLVMBuilderRef builder, + const struct lp_fragment_shader_variant_key *key, + struct lp_type src_type, struct lp_build_mask_context *mask, LLVMValueRef src, LLVMValueRef dst_ptr) { const struct util_format_description *format_desc; - union lp_type dst_type; + struct lp_type dst_type; - if(!lp->framebuffer.zsbuf) + if(!key->depth.enabled) return; - format_desc = util_format_description(lp->framebuffer.zsbuf->format); + format_desc = util_format_description(key->zsbuf_format); assert(format_desc); /* Pick the depth type. */ @@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp, #endif lp_build_depth_test(builder, - state, + &key->depth, dst_type, format_desc, mask, @@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp, } -struct build_fetch_texel_context -{ - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -static void -emit_fetch_texel( LLVMBuilderRef builder, - void *context, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct build_fetch_texel_context *bld = context; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!bld->samplers_ptr) - bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr); - - if(!bld->store_ptr) - bld->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = bld->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = bld->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -282,11 +181,11 @@ generate_fs(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, unsigned i, const struct lp_build_interp_soa_context *interp, - struct build_fetch_texel_context *sampler, + struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef *color, LLVMValueRef depth_ptr) @@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z = interp->pos[2]; + struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_test; unsigned attrib; @@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - lp_build_mask_begin(&mask, builder, type, *pmask); + flow = lp_build_flow_create(builder); + + memset(outputs, 0, sizeof outputs); + + lp_build_flow_scope_begin(flow); + + /* Declare the color and z variables */ + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + color[chan] = LLVMGetUndef(vec_type); + lp_build_flow_scope_declare(flow, &color[chan]); + } + lp_build_flow_scope_declare(flow, &z); + + lp_build_mask_begin(&mask, flow, type, *pmask); early_depth_test = - lp->depth_stencil->depth.enabled && - lp->framebuffer.zsbuf && - !lp->depth_stencil->alpha.enabled && - !lp->fs->info.uses_kill && - !lp->fs->info.writes_z; + key->depth.enabled && + !key->alpha.enabled && + !shader->info.uses_kill && + !shader->info.writes_z; if(early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); - memset(outputs, 0, sizeof outputs); - lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, emit_fetch_texel, sampler); + outputs, sampler); for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp, } if(!early_depth_test) - generate_depth(lp, builder, &key->depth, + generate_depth(builder, key, type, &mask, z, depth_ptr); lp_build_mask_end(&mask); + lp_build_flow_scope_end(flow); + + lp_build_flow_destroy(flow); + *pmask = mask.value; } @@ -385,13 +299,15 @@ generate_fs(struct llvmpipe_context *lp, static void generate_blend(const struct pipe_blend_state *blend, LLVMBuilderRef builder, - union lp_type type, + struct lp_type type, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, LLVMValueRef dst_ptr) { struct lp_build_context bld; + struct lp_build_flow_context *flow; + struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMTypeRef int_vec_type; LLVMValueRef const_ptr; @@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef res[4]; unsigned chan; + lp_build_context_init(&bld, builder, type); + + flow = lp_build_flow_create(builder); + lp_build_mask_begin(&mask_ctx, flow, type, mask); + vec_type = lp_build_vec_type(type); int_vec_type = lp_build_int_vec_type(type); - lp_build_context_init(&bld, builder, type); - const_ptr = lp_jit_context_blend_color(builder, context_ptr); const_ptr = LLVMBuildBitCast(builder, const_ptr, LLVMPointerType(vec_type, 0), ""); @@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_blend_soa(builder, blend, type, src, dst, con, res); for(chan = 0; chan < 4; ++chan) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); - lp_build_name(res[chan], "res.%c", "rgba"[chan]); - res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); - LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + if(blend->colormask & (1 << chan)) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0); + lp_build_name(res[chan], "res.%c", "rgba"[chan]); + res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]); + LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, "")); + } } + + lp_build_mask_end(&mask_ctx); + lp_build_flow_destroy(flow); } @@ -440,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp, { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; - union lp_type fs_type; - union lp_type blend_type; + struct lp_type fs_type; + struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; @@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; - struct build_fetch_texel_context sampler; + struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; @@ -507,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp, /* TODO: actually pick these based on the fs and color buffer * characteristics. */ - fs_type.value = 0; + memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ @@ -515,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp, fs_type.length = 4; /* 4 element per vector */ num_fs = 4; - blend_type.value = 0; + memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ @@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); - memset(&sampler, 0, sizeof sampler); - sampler.context_ptr = context_ptr; +#if 0 + /* C texture sampling */ + sampler = lp_c_sampler_soa_create(context_ptr); +#else + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); +#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp, context_ptr, i, &interp, - &sampler, + sampler, &fs_mask[i], out_color, depth_ptr_i); @@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp, fs_out_color[chan][i] = out_color[chan]; } + sampler->destroy(sampler); + /* * Convert the fs's output color and mask to fit to the blending type. */ @@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, */ static void make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, struct lp_fragment_shader_variant_key *key) { + unsigned i; + memset(key, 0, sizeof *key); - memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + if(lp->framebuffer.zsbuf && + lp->depth_stencil->depth.enabled) { + key->zsbuf_format = lp->framebuffer.zsbuf->format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } key->alpha.enabled = lp->depth_stencil->alpha.enabled; if(key->alpha.enabled) key->alpha.func = lp->depth_stencil->alpha.func; /* alpha.ref_value is passed in jit_context */ - memcpy(&key->blend, lp->blend, sizeof key->blend); + if(lp->framebuffer.cbufs[0]) { + const struct util_format_description *format_desc; + unsigned chan; + + memcpy(&key->blend, lp->blend, sizeof key->blend); + + format_desc = util_format_description(lp->framebuffer.cbufs[0]->format); + assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB); + + /* mask out color channels not present in the color buffer */ + for(chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + if(swizzle > 4) + key->blend.colormask &= ~(1 << chan); + } + } + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) + lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]); } @@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant; - make_variant_key(lp, &key); + make_variant_key(lp, shader, &key); variant = shader->variants; while(variant) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 4fef541b1e3..c69d90c723a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference(&llvmpipe->texture[i], tex); lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex); + + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; + jit_tex->width = tex->width[0]; + jit_tex->height = tex->height[0]; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + } } llvmpipe->num_textures = num; diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 69aaae26e0a..a88e110c663 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -86,43 +86,43 @@ random_float(void); void -dump_type(FILE *fp, union lp_type type); +dump_type(FILE *fp, struct lp_type type); double -read_elem(union lp_type type, const void *src, unsigned index); +read_elem(struct lp_type type, const void *src, unsigned index); void -write_elem(union lp_type type, void *dst, unsigned index, double src); +write_elem(struct lp_type type, void *dst, unsigned index, double src); void -random_elem(union lp_type type, void *dst, unsigned index); +random_elem(struct lp_type type, void *dst, unsigned index); void -read_vec(union lp_type type, const void *src, double *dst); +read_vec(struct lp_type type, const void *src, double *dst); void -write_vec(union lp_type type, void *dst, const double *src); +write_vec(struct lp_type type, void *dst, const double *src); void -random_vec(union lp_type type, void *dst); +random_vec(struct lp_type type, void *dst); boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps); +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps); boolean -compare_vec(union lp_type type, const void *res, const void *ref); +compare_vec(struct lp_type type, const void *res, const void *ref); void -dump_vec(FILE *fp, union lp_type type, const void *src); +dump_vec(FILE *fp, struct lp_type type, const void *src); #endif /* !LP_TEST_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 8dfad468e3c..94b661dcba4 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -80,7 +80,7 @@ static void write_tsv_row(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type, + struct lp_type type, double cycles, boolean success) { @@ -125,7 +125,7 @@ static void dump_blend_type(FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s", mode ? "soa" : "aos"); @@ -153,7 +153,7 @@ static LLVMValueRef add_blend_test(LLVMModuleRef module, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMTypeRef ret_type; LLVMTypeRef vec_type; @@ -467,7 +467,7 @@ test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, - union lp_type type) + struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -765,10 +765,10 @@ blend_funcs[] = { }; -const union lp_type blend_types[] = { +const struct lp_type blend_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */ - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */ + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */ + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ }; @@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; bool success = TRUE; for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { @@ -841,27 +841,27 @@ test_some(unsigned verbose, FILE *fp, unsigned long n) const unsigned *alpha_dst_factor; struct pipe_blend_state blend; enum vector_mode mode; - const union lp_type *type; + const struct lp_type *type; unsigned long i; bool success = TRUE; for(i = 0; i < n; ++i) { - rgb_func = &blend_funcs[random() % num_funcs]; - alpha_func = &blend_funcs[random() % num_funcs]; - rgb_src_factor = &blend_factors[random() % num_factors]; - alpha_src_factor = &blend_factors[random() % num_factors]; + rgb_func = &blend_funcs[rand() % num_funcs]; + alpha_func = &blend_funcs[rand() % num_funcs]; + rgb_src_factor = &blend_factors[rand() % num_factors]; + alpha_src_factor = &blend_factors[rand() % num_factors]; do { - rgb_dst_factor = &blend_factors[random() % num_factors]; + rgb_dst_factor = &blend_factors[rand() % num_factors]; } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); do { - alpha_dst_factor = &blend_factors[random() % num_factors]; + alpha_dst_factor = &blend_factors[rand() % num_factors]; } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); - mode = random() & 1; + mode = rand() & 1; - type = &blend_types[random() % num_types]; + type = &blend_types[rand() % num_types]; memset(&blend, 0, sizeof blend); blend.blend_enable = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index e6489834af5..9dcf58e5dcd 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -59,8 +59,8 @@ write_tsv_header(FILE *fp) static void write_tsv_row(FILE *fp, - union lp_type src_type, - union lp_type dst_type, + struct lp_type src_type, + struct lp_type dst_type, double cycles, boolean success) { @@ -80,8 +80,8 @@ write_tsv_row(FILE *fp, static void dump_conv_types(FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { fprintf(fp, "src_type="); dump_type(fp, src_type); @@ -96,8 +96,8 @@ dump_conv_types(FILE *fp, static LLVMValueRef add_conv_test(LLVMModuleRef module, - union lp_type src_type, unsigned num_srcs, - union lp_type dst_type, unsigned num_dsts) + struct lp_type src_type, unsigned num_srcs, + struct lp_type dst_type, unsigned num_dsts) { LLVMTypeRef args[2]; LLVMValueRef func; @@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module, static boolean test_one(unsigned verbose, FILE *fp, - union lp_type src_type, - union lp_type dst_type) + struct lp_type src_type, + struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; @@ -343,35 +343,35 @@ test_one(unsigned verbose, } -const union lp_type conv_types[] = { +const struct lp_type conv_types[] = { /* float, fixed, sign, norm, width, len */ - {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }}, + { TRUE, FALSE, TRUE, TRUE, 32, 4 }, + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, + { TRUE, FALSE, FALSE, FALSE, 32, 4 }, /* TODO: test fixed formats too */ - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }}, - {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }}, - {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }}, - - {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, - {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }}, + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 32, 4 }, + { FALSE, FALSE, TRUE, FALSE, 32, 4 }, + { FALSE, FALSE, FALSE, TRUE, 32, 4 }, + { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 16 }, + { FALSE, FALSE, TRUE, FALSE, 8, 16 }, + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, + { FALSE, FALSE, FALSE, FALSE, 8, 16 }, }; @@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean test_all(unsigned verbose, FILE *fp) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; bool success = TRUE; for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { @@ -407,16 +407,16 @@ test_all(unsigned verbose, FILE *fp) boolean test_some(unsigned verbose, FILE *fp, unsigned long n) { - const union lp_type *src_type; - const union lp_type *dst_type; + const struct lp_type *src_type; + const struct lp_type *dst_type; unsigned long i; bool success = TRUE; for(i = 0; i < n; ++i) { - src_type = &conv_types[random() % num_types]; + src_type = &conv_types[rand() % num_types]; do { - dst_type = &conv_types[random() % num_types]; + dst_type = &conv_types[rand() % num_types]; } while (src_type == dst_type || src_type->norm != dst_type->norm); if(!test_one(verbose, fp, *src_type, *dst_type)) diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 1d192355eed..d8455e56490 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module, lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba(builder, format, ptr); + rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); @@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba(builder, format, ptr, rgba); + lp_build_store_rgba_aos(builder, format, ptr, rgba); LLVMBuildRetVoid(builder); diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 49213fb4f0b..4592dc0b2d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,7 +40,7 @@ void dump_type(FILE *fp, - union lp_type type) + struct lp_type type) { fprintf(fp, "%s%s%u%sx%u", type.sign ? (type.floating || type.fixed ? "" : "s") : "u", @@ -52,7 +52,7 @@ dump_type(FILE *fp, double -read_elem(union lp_type type, const void *src, unsigned index) +read_elem(struct lp_type type, const void *src, unsigned index) { double scale = lp_const_scale(type); double value; @@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index) void -write_elem(union lp_type type, void *dst, unsigned index, double value) +write_elem(struct lp_type type, void *dst, unsigned index, double value) { assert(index < type.length); if(!type.sign && value < 0.0) @@ -184,11 +184,11 @@ write_elem(union lp_type type, void *dst, unsigned index, double value) void -random_elem(union lp_type type, void *dst, unsigned index) +random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); - value = (double)random()/(double)RAND_MAX; + value = (double)rand()/(double)RAND_MAX; if(!type.norm) { unsigned long long mask; if (type.floating) @@ -199,17 +199,17 @@ random_elem(union lp_type type, void *dst, unsigned index) mask = ((unsigned long long)1 << (type.width - 1)) - 1; else mask = ((unsigned long long)1 << type.width) - 1; - value += (double)(mask & random()); + value += (double)(mask & rand()); } if(!type.sign) - if(random() & 1) + if(rand() & 1) value = -value; write_elem(type, dst, index, value); } void -read_vec(union lp_type type, const void *src, double *dst) +read_vec(struct lp_type type, const void *src, double *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst) void -write_vec(union lp_type type, void *dst, const double *src) +write_vec(struct lp_type type, void *dst, const double *src) { unsigned i; for (i = 0; i < type.length; ++i) @@ -229,12 +229,12 @@ write_vec(union lp_type type, void *dst, const double *src) float random_float(void) { - return (float)((double)random()/(double)RAND_MAX); + return (float)((double)rand()/(double)RAND_MAX); } void -random_vec(union lp_type type, void *dst) +random_vec(struct lp_type type, void *dst) { unsigned i; for (i = 0; i < type.length; ++i) @@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst) boolean -compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps) +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) { unsigned i; for (i = 0; i < type.length; ++i) { @@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl boolean -compare_vec(union lp_type type, const void *res, const void *ref) +compare_vec(struct lp_type type, const void *res, const void *ref) { double eps = lp_const_eps(type); return compare_vec_with_eps(type, res, ref, eps); @@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref) void -dump_vec(FILE *fp, union lp_type type, const void *src) +dump_vec(FILE *fp, struct lp_type type, const void *src) { unsigned i; for (i = 0; i < type.length; ++i) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 23a94b5b0d5..773e8482425 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); + debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 628ec3f1efd..9ad1bde9565 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,10 +29,13 @@ #define LP_TEX_SAMPLE_H +#include <llvm-c/Core.h> + #include "tgsi/tgsi_exec.h" struct llvmpipe_tex_tile_cache; +struct lp_sampler_static_state; /** @@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]); +/** + * Texture sampling code generator that just calls lp_get_samples C function + * for the actual sampling computation. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr); + + +/** + * Pure-LLVM texture sampling code generator. + * + * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key, + LLVMValueRef context_ptr); + + #endif /* LP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 94eb6dad5af..a1365a045f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1578,3 +1578,136 @@ out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } + +void PIPE_CDECL +lp_fetch_texel_soa( struct tgsi_sampler **samplers, + uint32_t unit, + float *store ) +{ + struct tgsi_sampler *sampler = samplers[unit]; + +#if 0 + uint j; + + debug_printf("%s sampler: %p (%p) store: %p\n", + __FUNCTION__, + sampler, *sampler, + store ); + + debug_printf("lodbias %f\n", store[12]); + + for (j = 0; j < 4; j++) + debug_printf("sample %d texcoord %f %f\n", + j, + store[0+j], + store[4+j]); +#endif + + { + float rgba[NUM_CHANNELS][QUAD_SIZE]; + sampler->get_samples(sampler, + &store[0], + &store[4], + &store[8], + 0.0f, /*store[12], lodbias */ + rgba); + memcpy(store, rgba, sizeof rgba); + } + +#if 0 + for (j = 0; j < 4; j++) + debug_printf("sample %d result %f %f %f %f\n", + j, + store[0+j], + store[4+j], + store[8+j], + store[12+j]); +#endif +} + + +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_tgsi.h" + + +struct lp_c_sampler_soa +{ + struct lp_build_sampler_soa base; + + LLVMValueRef context_ptr; + + LLVMValueRef samplers_ptr; + + /** Coords/texels store */ + LLVMValueRef store_ptr; +}; + + +static void +lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; + LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); + LLVMValueRef args[3]; + unsigned i; + + if(!sampler->samplers_ptr) + sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); + + if(!sampler->store_ptr) + sampler->store_ptr = LLVMBuildArrayAlloca(builder, + vec_type, + LLVMConstInt(LLVMInt32Type(), 4, 0), + "texel_store"); + + for (i = 0; i < num_coords; i++) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + LLVMBuildStore(builder, coords[i], coord_ptr); + } + + args[0] = sampler->samplers_ptr; + args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); + args[2] = sampler->store_ptr; + + lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); + texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); + } +} + + +struct lp_build_sampler_soa * +lp_c_sampler_soa_create(LLVMValueRef context_ptr) +{ + struct lp_c_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_c_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_c_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; + sampler->context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c new file mode 100644 index 00000000000..d2a6ae21f57 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "lp_bld_debug.h" +#include "lp_bld_type.h" +#include "lp_bld_intr.h" +#include "lp_bld_sample.h" +#include "lp_bld_tgsi.h" +#include "lp_state.h" +#include "lp_tex_sample.h" + + +/** + * This provides the bridge between the sampler state store in lp_jit_context + * and lp_jit_texture and the sampler code generator. It provides the + * texture layout information required by the texture sampler code generator + * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; + + LLVMValueRef context_ptr; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, + LLVMBuilderRef builder, + unsigned unit, + unsigned member_index, + const char *member_name) +{ + struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + /* context[0].textures */ + indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0); + /* context[0].textures[unit] */ + indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0); + /* context[0].textures[unit].member */ + indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0); + + ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); + + res = LLVMBuildLoad(builder, ptr, ""); + + lp_build_name(res, "context.texture%u.%s", unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to fetch + * the members of lp_jit_texture to fulfill the sampler code generator requests. + * + * This complexity is the price we have to pay to keep the texture sampler code + * generator a reusable module without dependencies to llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ + LLVMBuilderRef builder, \ + unsigned unit) \ + { \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) +LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +static void +lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, + LLVMBuilderRef builder, + struct lp_type type, + unsigned unit, + unsigned num_coords, + const LLVMValueRef *coords, + LLVMValueRef lodbias, + LLVMValueRef *texel) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(unit < PIPE_MAX_SAMPLERS); + + lp_build_sample_soa(builder, + &sampler->dynamic_state.static_state[unit], + &sampler->dynamic_state.base, + type, + unit, + num_coords, + coords, + lodbias, + texel); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, + LLVMValueRef context_ptr) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; + sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.context_ptr = context_ptr; + + return &sampler->base; +} + diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 143afec3d35..2e576e6039d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -225,11 +225,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) tc->clear_val); screen->transfer_unmap(screen, pt); + + tile->status = LP_TILE_STATUS_UNDEFINED; break; } case LP_TILE_STATUS_DEFINED: lp_put_tile_rgba_soa(pt, x, y, tile->color); + tile->status = LP_TILE_STATUS_UNDEFINED; break; } } @@ -257,7 +260,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, case LP_TILE_STATUS_UNDEFINED: /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x, y, tile->color); + lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); tile->status = LP_TILE_STATUS_DEFINED; break; diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index ff2febb668e..170ce3eb7e5 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -16,8 +16,6 @@ nv04_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4469b22d91a..ee5901e743e 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -15,8 +15,6 @@ nv10_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index e6924ad71eb..4eeacd1afd5 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -15,8 +15,6 @@ nv20_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index f8285e4455f..41af38450b5 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -22,8 +22,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 5d2a4216c5a..bd13dfddd1c 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -21,8 +21,6 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 6e8f4f9750d..fca078b174a 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -37,11 +37,12 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, /* We need this in the ddx for reliable composite, not sure what we're * actually flushing. We generate all our own flushes with flags = 0. */ - WAIT_RING(chan, 3); + WAIT_RING(chan, 2); BEGIN_RING(chan, eng2d, 0x0110, 1); OUT_RING (chan, 0); - FIRE_RING(chan); + if (flags & PIPE_FLUSH_FRAME) + FIRE_RING(chan); } static void @@ -110,6 +111,9 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.is_texture_referenced = nv50_is_texture_referenced; nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced; + screen->base.channel->user_private = nv50; + screen->base.channel->flush_notify = nv50_state_flush_notify; + nv50_init_surface_functions(nv50); nv50_init_state_functions(nv50); nv50_init_query_functions(nv50); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1e9e8e49bfb..4608854d711 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -116,6 +116,7 @@ struct nv50_state { unsigned miptree_nr; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *programs; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; @@ -190,10 +191,12 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); +extern void nv50_state_flush_notify(struct nouveau_channel *chan); /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 03b9243b828..93479a0314a 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->image_nr = 1; mt->level[0].pitch = *stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].tile_mode = bo->tile_mode; nouveau_bo_ref(bo, &mt->base.bo); return &mt->base.base; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4a838529de7..eb90d5e66f9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -112,6 +112,10 @@ struct nv50_pc { struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; + /* broadcast and destination replacement regs */ + struct nv50_reg *r_brdc; + struct nv50_reg *r_dst[4]; + unsigned interp_mode[32]; /* perspective interpolation registers */ struct nv50_reg *iv_p; @@ -124,6 +128,25 @@ struct nv50_pc { boolean allow32; }; +static INLINE void +ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) +{ + reg->type = type; + reg->index = index; + reg->hw = hw; + reg->neg = 0; + reg->rhw = -1; + reg->acc = 0; +} + +static INLINE unsigned +popcnt4(uint32_t val) +{ + static const unsigned cnt[16] + = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + return cnt[val & 0xf]; +} + static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { @@ -184,11 +207,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) for (i = 0; i < NV50_SU_MAX_TEMP; i++) { if (!pc->r_temp[i]) { - r = CALLOC_STRUCT(nv50_reg); - r->type = P_TEMP; - r->index = -1; - r->hw = i; - r->rhw = -1; + r = MALLOC_STRUCT(nv50_reg); + ctor_reg(r, P_TEMP, -1, i); pc->r_temp[i] = r; return r; } @@ -254,10 +274,8 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) return alloc_temp4(pc, dst, idx + 4); for (i = 0; i < 4; i++) { - dst[i] = CALLOC_STRUCT(nv50_reg); - dst[i]->type = P_TEMP; - dst[i]->index = -1; - dst[i]->hw = idx + i; + dst[i] = MALLOC_STRUCT(nv50_reg); + ctor_reg(dst[i], P_TEMP, -1, idx + i); pc->r_temp[idx + i] = dst[i]; } @@ -309,7 +327,7 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) static struct nv50_reg * alloc_immd(struct nv50_pc *pc, float f) { - struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + struct nv50_reg *r = MALLOC_STRUCT(nv50_reg); unsigned hw; for (hw = 0; hw < pc->immd_nr * 4; hw++) @@ -319,9 +337,7 @@ alloc_immd(struct nv50_pc *pc, float f) if (hw == pc->immd_nr * 4) hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; - r->type = P_IMMD; - r->hw = hw; - r->index = -1; + ctor_reg(r, P_IMMD, -1, hw); return r; } @@ -786,6 +802,9 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 +/* 0x04 == 32 bit */ +/* 0x40 == dst is float */ +/* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 #define CVT_F32_U32 0x64 @@ -795,7 +814,7 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cop, unsigned fmt) + int wp, unsigned cvn, unsigned fmt) { struct nv50_program_exec *e; @@ -804,7 +823,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= 0xa0000000; e->inst[1] |= 0x00004000; - e->inst[1] |= (cop << 16); + e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -821,49 +840,80 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +/* nv50 Condition codes: + * 0x1 = LT + * 0x2 = EQ + * 0x3 = LE + * 0x4 = GT + * 0x5 = NE + * 0x6 = GE + * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) + * 0x8 = unordered bit (allows NaN) + */ static void -emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, struct nv50_reg *src0, struct nv50_reg *src1) { + static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_program_exec *e = exec(pc); - unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; - assert(c_op <= 7); + assert(ccode < 16); if (check_swap_src_0_1(pc, &src0, &src1)) - c_op = inv_cop[c_op]; + ccode = cc_swapped[ccode & 7] | (ccode & 8); rdst = dst; - if (dst->type != P_TEMP) + if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); /* set.u32 */ set_long(pc, e); e->inst[0] |= 0xb0000000; - e->inst[1] |= (3 << 29); - e->inst[1] |= (c_op << 14); - /*XXX: breaks things, .u32 by default? - * decuda will disasm as .u16 and use .lo/.hi regs, but this - * doesn't seem to match what the hw actually does. - inst[1] |= 0x04000000; << breaks things.. .u32 by default? + e->inst[1] |= 0x60000000 | (ccode << 14); + + /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but + * that doesn't seem to match what the hw actually does + e->inst[1] |= 0x04000000; << breaks things, u32 by default ? */ - set_dst(pc, dst, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + set_src_0(pc, src0, e); set_src_1(pc, src1, e); - emit(pc, e); - /* cvt.f32.u32 */ - e = exec(pc); - e->inst[0] = 0xa0000001; - e->inst[1] = 0x64014780; - set_dst(pc, rdst, e); - set_src_0(pc, dst, e); emit(pc, e); - if (dst != rdst) + /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ + if (rdst) + emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && rdst != dst) free_temp(pc, dst); } +static INLINE unsigned +map_tgsi_setop_cc(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_SLT: return 0x1; + case TGSI_OPCODE_SGE: return 0x6; + case TGSI_OPCODE_SEQ: return 0x2; + case TGSI_OPCODE_SGT: return 0x4; + case TGSI_OPCODE_SLE: return 0x3; + case TGSI_OPCODE_SNE: return 0xd; + default: + assert(0); + return 0; + } +} + static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -890,6 +940,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); } +static INLINE void +emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); +} + static void emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg **src) @@ -1159,6 +1215,70 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) } } +/* Return a read mask for source registers deduced from opcode & write mask. */ +static unsigned +nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) +{ + unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_ext_texture *tex; + + assert(insn->Instruction.Extended); + tex = &insn->InstructionExtTexture; + + mask = 0x7; + if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) + mask |= 0x8; + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + default: + break; + } + + return mask; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -1258,93 +1378,126 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, return r; } -/* returns TRUE if instruction can overwrite sources before they're read */ +/* return TRUE for ops that produce only a single result */ static boolean -direct2dest_op(const struct tgsi_full_instruction *insn) +is_scalar_op(unsigned op) { - if (insn->Instruction.Saturate) - return FALSE; - - switch (insn->Instruction.Opcode) { + switch (op) { case TGSI_OPCODE_COS: + case TGSI_OPCODE_DP2: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_LIT: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: case TGSI_OPCODE_SIN: + /* + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: + */ + return TRUE; + default: + return FALSE; + } +} + +/* Returns a bitmask indicating which dst components depend + * on source s, component c (reverse of nv50_tgsi_src_mask). + */ +static unsigned +nv50_tgsi_dst_revdep(unsigned op, int s, int c) +{ + if (is_scalar_op(op)) + return 0x1; + + switch (op) { + case TGSI_OPCODE_DST: + return (1 << c) & (s ? 0xa : 0x6); + case TGSI_OPCODE_XPD: + switch (c) { + case 0: return 0x6; + case 1: return 0x5; + case 2: return 0x3; + case 3: return 0x0; + default: + assert(0); + return 0x0; + } + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - return FALSE; + /* these take care of dangerous swizzles themselves */ + return 0x0; + case TGSI_OPCODE_IF: + case TGSI_OPCODE_KIL: + /* don't call this function for these ops */ + assert(0); + return 0; default: - return TRUE; + /* linear vector instruction */ + return (1 << c); } } static boolean -nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +nv50_program_tx_insn(struct nv50_pc *pc, + const struct tgsi_full_instruction *inst) { - const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp; unsigned mask, sat, unit; - boolean assimilate = FALSE; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + memset(src, 0, sizeof(src)); + for (c = 0; c < 4; c++) { - if (mask & (1 << c)) + if ((mask & (1 << c)) && !pc->r_dst[c]) dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); else - dst[c] = NULL; - rdst[c] = NULL; - src[0][c] = NULL; - src[1][c] = NULL; - src[2][c] = NULL; + dst[c] = pc->r_dst[c]; + rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + unsigned src_mask; + boolean neg_supp; + + src_mask = nv50_tgsi_src_mask(inst, i); + neg_supp = negate_supported(inst, i); if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) unit = fs->SrcRegister.Index; for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, fs, - negate_supported(inst, i)); + if (src_mask & (1 << c)) + src[i][c] = tgsi_src(pc, c, fs, neg_supp); } - if (sat) { - for (c = 0; c < 4; c++) { - rdst[c] = dst[c]; - dst[c] = temp_temp(pc); - } + brdc = temp = pc->r_brdc; + if (brdc && brdc->type != P_TEMP) { + temp = temp_temp(pc); + if (sat) + brdc = temp; } else - if (direct2dest_op(inst)) { + if (sat) { for (c = 0; c < 4; c++) { - if (!dst[c] || dst[c]->type != P_TEMP) + if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - - for (i = c + 1; i < 4; i++) { - if (dst[c] == src[0][i] || - dst[c] == src[1][i] || - dst[c] == src[2][i]) - break; - } - if (i == 4) - continue; - - assimilate = TRUE; rdst[c] = dst[c]; - dst[c] = alloc_temp(pc, NULL); + dst[c] = temp_temp(pc); } } + assert(brdc || !is_scalar_op(inst->Instruction.Opcode)); + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { @@ -1360,74 +1513,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_add(pc, dst[c], src[0][c], src[1][c]); } break; - case TGSI_OPCODE_COS: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, temp, temp); + case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_mov(pc, dst[c], temp); + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_CEIL, CVT_F32_F32); + } + break; + case TGSI_OPCODE_COS: + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 5, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, brdc, temp); break; case TGSI_OPCODE_DP3: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); - emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][2], src[1][2], temp); break; case TGSI_OPCODE_DP4: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_mad(pc, brdc, src[0][3], src[1][3], temp); break; case TGSI_OPCODE_DPH: - temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - emit_add(pc, temp, src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_add(pc, brdc, src[1][3], temp); break; case TGSI_OPCODE_DST: - { - struct nv50_reg *one = alloc_immd(pc, 1.0); - if (mask & (1 << 0)) - emit_mov(pc, dst[0], one); if (mask & (1 << 1)) emit_mul(pc, dst[1], src[0][1], src[1][1]); if (mask & (1 << 2)) emit_mov(pc, dst[2], src[0][2]); if (mask & (1 << 3)) emit_mov(pc, dst[3], src[1][3]); - FREE(one); - } + if (mask & (1 << 0)) + emit_mov_immdval(pc, dst[0], 1.0f); break; case TGSI_OPCODE_EX2: - temp = temp_temp(pc); emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 6, brdc, temp); break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -1450,19 +1585,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_kil(pc, src[0][1]); emit_kil(pc, src[0][2]); emit_kil(pc, src[0][3]); - pc->p->cfg.fp.regs[2] |= 0x00100000; break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - temp = temp_temp(pc); - emit_flop(pc, 3, temp, src[0][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_flop(pc, 3, brdc, src[0][0]); break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -1510,31 +1638,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_POW: - temp = temp_temp(pc); - emit_pow(pc, temp, src[0][0], src[1][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } + emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 0, dst[c], src[0][0]); - } + emit_flop(pc, 0, brdc, src[0][0]); break; case TGSI_OPCODE_RSQ: - for (c = 3; c >= 0; c--) { - if (!(mask & (1 << c))) - continue; - emit_flop(pc, 2, dst[c], src[0][0]); - } + emit_flop(pc, 2, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); + if (mask & 3) + emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) emit_flop(pc, 5, dst[0], temp); if (mask & (1 << 1)) @@ -1544,28 +1659,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; - case TGSI_OPCODE_SGE: - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_set(pc, 6, dst[c], src[0][c], src[1][c]); - } - break; case TGSI_OPCODE_SIN: - temp = temp_temp(pc); - emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); + if (mask & 8) { + emit_precossin(pc, temp, src[0][3]); + emit_flop(pc, 4, dst[3], temp); + if (!(mask &= 7)) + break; + if (temp == dst[3]) + temp = brdc = temp_temp(pc); } + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, brdc, temp); break; case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + i = map_tgsi_setop_cc(inst->Instruction.Opcode); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); } break; case TGSI_OPCODE_SUB: @@ -1583,6 +1699,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_tex(pc, dst, mask, src[0], unit, inst->InstructionExtTexture.Texture, TRUE); break; + case TGSI_OPCODE_TRUNC: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVTOP_TRUNC, CVT_F32_F32); + } + break; case TGSI_OPCODE_XPD: temp = temp_temp(pc); if (mask & (1 << 0)) { @@ -1607,17 +1731,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + if (brdc) { + if (sat) + emit_sat(pc, brdc, brdc); + for (c = 0; c < 4; c++) + if ((mask & (1 << c)) && dst[c] != brdc) + emit_mov(pc, dst[c], brdc); + } else if (sat) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, - CVT_F32_F32); + /* in this case we saturate later */ + if (dst[c]->type == P_TEMP && dst[c]->index < 0) + continue; + emit_sat(pc, rdst[c], dst[c]); } - } else if (assimilate) { - for (c = 0; c < 4; c++) - if (rdst[c]) - assimilate_temp(pc, rdst[c], dst[c]); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1626,9 +1755,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); - else - if (src[i][c]->acc == pc->insn_cur) - release_hw(pc, src[i][c]); } } @@ -1636,180 +1762,271 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return TRUE; } -/* Adjust a bitmask that indicates what components of a source are used, - * we use this in tx_prep so we only load interpolants that are needed. - */ static void -insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) +prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - const struct tgsi_instruction_ext_texture *tex; - - switch (insn->Instruction.Opcode) { - case TGSI_OPCODE_DP3: - *mask = 0x7; - break; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - *mask = 0xF; - break; - case TGSI_OPCODE_LIT: - *mask = 0xB; - break; - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - *mask = 0x1; - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXP: - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; - - *mask = 0x7; - if (tex->Texture == TGSI_TEXTURE_1D) - *mask = 0x1; - else - if (tex->Texture == TGSI_TEXTURE_2D) - *mask = 0x3; - - if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) - *mask |= 0x8; - break; - default: - break; - } -} - -static void -prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, - unsigned *r_usage[2]) -{ - const struct tgsi_full_instruction *insn; + struct nv50_reg *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; + unsigned i, c, k, mask; - unsigned i, c, k, n, mask, *acc_p; - - insn = &tok->FullInstruction; dst = &insn->FullDstRegisters[0].DstRegister; mask = dst->WriteMask; - if (!r_usage[0]) - r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); - if (!r_usage[1]) - r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); + if (dst->File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (dst->File == TGSI_FILE_OUTPUT) + reg = pc->result; - if (dst->File == TGSI_FILE_TEMPORARY) { + if (reg) { for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - r_usage[0][dst->Index * 4 + c] = pc->insn_nr; + reg[dst->Index * 4 + c].acc = pc->insn_nr; } } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { src = &insn->FullSrcRegisters[i]; - switch (src->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - acc_p = r_usage[0]; - break; - case TGSI_FILE_INPUT: - acc_p = r_usage[1]; - break; - default: + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + reg = pc->temp; + else + if (src->SrcRegister.File == TGSI_FILE_INPUT) + reg = pc->attr; + else continue; - } - insn_adjust_mask(insn, &mask); + mask = nv50_tgsi_src_mask(insn, i); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - k = tgsi_util_get_full_src_register_extswizzle(src, c); - switch (k) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - n = src->SrcRegister.Index * 4 + k; - acc_p[n] = pc->insn_nr; - break; - default: - break; - } + + if (k > TGSI_EXTSWIZZLE_W) + continue; + + reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; } } } +/* Returns a bitmask indicating which dst components need to be + * written to temporaries first to avoid 'corrupting' sources. + * + * m[i] (out) indicate component to write in the i-th position + * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source + */ static unsigned -load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, - int *aid, int *p_oid) +nv50_revdep_reorder(unsigned m[4], unsigned rdep[4]) { - struct nv50_reg *iv; - int oid, c, n; - unsigned mask = 0; + unsigned i, c, x, unsafe; - iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; + for (c = 0; c < 4; c++) + m[c] = c; - for (c = 0, n = i * 4; c < 4; c++, n++) { - oid = (*p_oid)++; - pc->attr[n].type = P_TEMP; - pc->attr[n].index = i; + /* Swap as long as a dst component written earlier is depended on + * by one written later, but the next one isn't depended on by it. + */ + for (c = 0; c < 3; c++) { + if (rdep[m[c + 1]] & (1 << m[c])) + continue; /* if next one is depended on by us */ + for (i = c + 1; i < 4; i++) + /* if we are depended on by a later one */ + if (rdep[m[c]] & (1 << m[i])) + break; + if (i == 4) + continue; + /* now, swap */ + x = m[c]; + m[c] = m[c + 1]; + m[c + 1] = x; + + /* restart */ + c = 0; + } + + /* mark dependencies that could not be resolved by reordering */ + for (i = 0; i < 3; ++i) + for (c = i + 1; c < 4; ++c) + if (rdep[m[i]] & (1 << m[c])) + unsafe |= (1 << i); + + /* NOTE: $unsafe is with respect to order, not component */ + return unsafe; +} - if (pc->attr[n].acc == acc[n]) +/* Select a suitable dst register for broadcasting scalar results, + * or return NULL if we have to allocate an extra TEMP. + * + * If e.g. only 1 component is written, we may also emit the final + * result to a write-only register. + */ +static struct nv50_reg * +tgsi_broadcast_dst(struct nv50_pc *pc, + const struct tgsi_full_dst_register *fd, unsigned mask) +{ + if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->DstRegister.WriteMask); + if (c) + return tgsi_dst(pc, c - 1, fd); + } else { + int c = ffs(fd->DstRegister.WriteMask) - 1; + if ((1 << c) == fd->DstRegister.WriteMask) + return tgsi_dst(pc, c, fd); + } + + return NULL; +} + +/* Scan source swizzles and return a bitmask indicating dst regs that + * also occur among the src regs, and fill rdep for nv50_revdep_reoder. + */ +static unsigned +nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, + unsigned rdep[4]) +{ + const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_src_register *fs; + unsigned i, deqs = 0; + + for (i = 0; i < 4; ++i) + rdep[i] = 0; + + for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { + unsigned chn, mask = nv50_tgsi_src_mask(insn, i); + boolean neg_supp = negate_supported(insn, i); + + fs = &insn->FullSrcRegisters[i]; + if (fs->SrcRegister.File != fd->DstRegister.File || + fs->SrcRegister.Index != fd->DstRegister.Index) continue; - mask |= (1 << c); - pc->attr[n].acc = acc[n]; - pc->attr[n].rhw = pc->attr[n].hw = -1; - alloc_reg(pc, &pc->attr[n]); + for (chn = 0; chn < 4; ++chn) { + unsigned s, c; + + if (!(mask & (1 << chn))) /* src is not read */ + continue; + c = tgsi_util_get_full_src_register_extswizzle(fs, chn); + s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - pc->attr[n].rhw = (*aid)++; - emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); + if (c > TGSI_EXTSWIZZLE_W || + !(fd->DstRegister.WriteMask & (1 << c))) + continue; - pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); - (*mid)++; - pc->p->cfg.fp.regs[1] += 0x00010001; + /* no danger if src is copied to TEMP first */ + if ((s != TGSI_UTIL_SIGN_KEEP) && + (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) + continue; + + rdep[c] |= nv50_tgsi_dst_revdep( + insn->Instruction.Opcode, i, chn); + deqs |= (1 << c); + } } - return mask; + return deqs; } static boolean -nv50_program_tx_prep(struct nv50_pc *pc) +nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { - struct tgsi_parse_context p; - boolean ret = FALSE; - unsigned i, c; - unsigned fcol, bcol, fcrd, depr; + struct tgsi_full_instruction insn = tok->FullInstruction; + const struct tgsi_full_dst_register *fd; + unsigned i, deqs, rdep[4], m[4]; + + fd = &tok->FullInstruction.FullDstRegisters[0]; + deqs = nv50_tgsi_scan_swizzle(&insn, rdep); + + if (is_scalar_op(insn.Instruction.Opcode)) { + pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); + if (!pc->r_brdc) + pc->r_brdc = temp_temp(pc); + return nv50_program_tx_insn(pc, &insn); + } + pc->r_brdc = NULL; + + if (!deqs) + return nv50_program_tx_insn(pc, &insn); + + deqs = nv50_revdep_reorder(m, rdep); - /* count (centroid) perspective interpolations */ - unsigned centroid_loads = 0; - unsigned perspect_loads = 0; + for (i = 0; i < 4; ++i) { + assert(pc->r_dst[m[i]] == NULL); - /* track register access for temps and attrs */ - unsigned *r_usage[2]; - r_usage[0] = NULL; - r_usage[1] = NULL; + insn.FullDstRegisters[0].DstRegister.WriteMask = + fd->DstRegister.WriteMask & (1 << m[i]); - depr = fcol = bcol = fcrd = 0xffff; + if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + continue; + + if (deqs & (1 << i)) + pc->r_dst[m[i]] = alloc_temp(pc, NULL); - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->p->cfg.fp.regs[0] = 0x01000404; - pc->p->cfg.fp.regs[1] = 0x00000400; + if (!nv50_program_tx_insn(pc, &insn)) + return FALSE; } - tgsi_parse_init(&p, pc->p->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; + for (i = 0; i < 4; i++) { + struct nv50_reg *reg = pc->r_dst[i]; + if (!reg) + continue; + pc->r_dst[i] = NULL; + + if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) + emit_sat(pc, tgsi_dst(pc, i, fd), reg); + else + emit_mov(pc, tgsi_dst(pc, i, fd), reg); + free_temp(pc, reg); + } - tgsi_parse_token(&p); + return TRUE; +} + +static void +load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *iv, **ppiv; + unsigned mode = pc->interp_mode[reg->index]; + + ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p; + iv = *ppiv; + + if ((mode & INTERP_PERSPECTIVE) && !iv) { + iv = *ppiv = alloc_temp(pc, NULL); + iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; + + emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); + emit_flop(pc, 0, iv, iv); + + /* XXX: when loading interpolants dynamically, move these + * to the program head, or make sure it can't be skipped. + */ + } + + emit_interp(pc, reg, iv, mode); +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context tp; + struct nv50_program *p = pc->p; + boolean ret = FALSE; + unsigned i, c, flat_nr = 0; + + tgsi_parse_init(&tp, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&tp)) { + const union tgsi_full_token *tok = &tp.FullToken; + + tgsi_parse_token(&tp); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: { const struct tgsi_full_immediate *imm = - &p.FullToken.FullImmediate; + &tp.FullToken.FullImmediate; ctor_immd(pc, imm->u[0].Float, imm->u[1].Float, @@ -1820,78 +2037,61 @@ nv50_program_tx_prep(struct nv50_pc *pc) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *d; - unsigned last, first, mode; + unsigned si, last, first, mode; - d = &p.FullToken.FullDeclaration; + d = &tp.FullToken.FullDeclaration; first = d->DeclarationRange.First; last = d->DeclarationRange.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (pc->temp_nr < (last + 1)) - pc->temp_nr = last + 1; break; case TGSI_FILE_OUTPUT: - if (pc->result_nr < (last + 1)) - pc->result_nr = last + 1; - - if (!d->Declaration.Semantic) + if (!d->Declaration.Semantic || + p->type == PIPE_SHADER_FRAGMENT) break; + si = d->Semantic.SemanticIndex; switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - depr = first; - pc->p->cfg.fp.regs[2] |= 0x00000100; - pc->p->cfg.fp.regs[3] |= 0x00000011; + case TGSI_SEMANTIC_BCOLOR: + p->cfg.two_side[si].hw = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; + break; + case TGSI_SEMANTIC_PSIZE: + p->cfg.psiz = first; + if (p->cfg.io_nr > first) + p->cfg.io_nr = first; + break; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + p->cfg.clpd = MIN2(p->cfg.clpd, first); break; + */ default: break; } - break; case TGSI_FILE_INPUT: { - if (pc->attr_nr < (last + 1)) - pc->attr_nr = last + 1; - - if (pc->p->type != PIPE_SHADER_FRAGMENT) + if (p->type != PIPE_SHADER_FRAGMENT) break; switch (d->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: mode = INTERP_FLAT; + flat_nr++; break; case TGSI_INTERPOLATE_PERSPECTIVE: mode = INTERP_PERSPECTIVE; + p->cfg.regs[1] |= 0x08 << 24; break; default: mode = INTERP_LINEAR; break; } - - if (d->Declaration.Semantic) { - switch (d->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - fcrd = first; - break; - case TGSI_SEMANTIC_COLOR: - fcol = first; - mode = INTERP_PERSPECTIVE; - break; - case TGSI_SEMANTIC_BCOLOR: - bcol = first; - mode = INTERP_PERSPECTIVE; - break; - } - } - - if (d->Declaration.Centroid) { + if (d->Declaration.Centroid) mode |= INTERP_CENTROID; - if (mode & INTERP_PERSPECTIVE) - centroid_loads++; - } else - if (mode & INTERP_PERSPECTIVE) - perspect_loads++; assert(last < 32); for (i = first; i <= last; i++) @@ -1899,8 +2099,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } break; case TGSI_FILE_CONSTANT: - if (pc->param_nr < (last + 1)) - pc->param_nr = last + 1; break; case TGSI_FILE_SAMPLER: break; @@ -1913,182 +2111,155 @@ nv50_program_tx_prep(struct nv50_pc *pc) break; case TGSI_TOKEN_TYPE_INSTRUCTION: pc->insn_nr++; - prep_inspect_insn(pc, tok, r_usage); + prep_inspect_insn(pc, &tok->FullInstruction); break; default: break; } } - if (pc->temp_nr) { - pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); - if (!pc->temp) - goto out_err; + if (p->type == PIPE_SHADER_VERTEX) { + int rid = 0; - for (i = 0; i < pc->temp_nr; i++) { - for (c = 0; c < 4; c++) { - pc->temp[i*4+c].type = P_TEMP; - pc->temp[i*4+c].hw = -1; - pc->temp[i*4+c].rhw = -1; - pc->temp[i*4+c].index = i; - pc->temp[i*4+c].acc = r_usage[0][i*4+c]; + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); } } - } - - if (pc->attr_nr) { - int oid = 4, mid = 4, aid = 0; - /* oid = VP output id - * aid = FP attribute/interpolant id - * mid = VP output mapping field ID - */ - pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); - if (!pc->attr) - goto out_err; + for (i = 0, rid = 0; i < pc->result_nr; ++i) { + p->cfg.io[i].hw = rid; + p->cfg.io[i].id_vp = i; - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - /* position should be loaded first */ - if (fcrd != 0xffff) { - unsigned mask; - mid = 0; - mask = load_fp_attrib(pc, fcrd, r_usage[1], - &mid, &aid, &oid); - oid = 0; - pc->p->cfg.fp.regs[1] |= (mask << 24); - pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; - } - pc->p->cfg.fp.map[0] += 0x03020100; - - /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ - - if (perspect_loads) { - pc->iv_p = alloc_temp(pc, NULL); - - if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { - pc->p->cfg.fp.regs[1] |= 0x08000000; - pc->iv_p->rhw = aid++; - emit_interp(pc, pc->iv_p, NULL, - INTERP_LINEAR); - emit_flop(pc, 0, pc->iv_p, pc->iv_p); - } else { - pc->iv_p->rhw = aid - 1; - emit_flop(pc, 0, pc->iv_p, - &pc->attr[fcrd * 4 + 3]); - } + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->result[n].acc) + continue; + pc->result[n].hw = rid++; + p->cfg.io[i].mask |= 1 << c; } + } - if (centroid_loads) { - pc->iv_c = alloc_temp(pc, NULL); - pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; - emit_interp(pc, pc->iv_c, NULL, - INTERP_CENTROID); - emit_flop(pc, 0, pc->iv_c, pc->iv_c); - pc->p->cfg.fp.regs[1] |= 0x08000000; - } + for (c = 0; c < 2; ++c) + if (p->cfg.two_side[c].hw < 0x40) + p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c].hw]; - for (c = 0; c < 4; c++) { - /* I don't know what these values do, but - * let's set them like the blob does: - */ - if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - } - - for (i = 0; i < pc->attr_nr; i++) - load_fp_attrib(pc, i, r_usage[1], - &mid, &aid, &oid); + if (p->cfg.psiz < 0x40) + p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + } else + if (p->type == PIPE_SHADER_FRAGMENT) { + int rid, aid; + unsigned n = 0, m = pc->attr_nr - flat_nr; - if (pc->iv_p) - free_temp(pc, pc->iv_p); - if (pc->iv_c) - free_temp(pc, pc->iv_c); + int base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; - pc->p->cfg.fp.high_map = (mid / 4); - pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); - } else { - /* vertex program */ - for (i = 0; i < pc->attr_nr * 4; i++) { - pc->p->cfg.vp.attr[aid / 32] |= - (1 << (aid % 32)); - pc->attr[i].type = P_ATTR; - pc->attr[i].hw = aid++; - pc->attr[i].index = i / 4; + /* non-flat interpolants have to be mapped to + * the lower hardware IDs, so sort them: + */ + for (i = 0; i < pc->attr_nr; i++) { + if (pc->interp_mode[i] == INTERP_FLAT) { + p->cfg.io[m].id_vp = i + base; + p->cfg.io[m++].id_fp = i; + } else { + if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) + p->cfg.io[n].linear = TRUE; + p->cfg.io[n].id_vp = i + base; + p->cfg.io[n++].id_fp = i; } } - } - if (pc->result_nr) { - int rid = 0; + if (!base) /* set w-coordinate mask from perspective interp */ + p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; - pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); - if (!pc->result) - goto out_err; + aid = popcnt4( /* if fcrd isn't contained in cfg.io */ + base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); - for (i = 0; i < pc->result_nr; i++) { - for (c = 0; c < 4; c++) { - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - pc->result[i*4+c].type = P_TEMP; - pc->result[i*4+c].hw = -1; - pc->result[i*4+c].rhw = (i == depr) ? - -1 : rid++; - } else { - pc->result[i*4+c].type = P_RESULT; - pc->result[i*4+c].hw = rid++; - } - pc->result[i*4+c].index = i; - } + for (n = 0; n < pc->attr_nr; ++n) { + p->cfg.io[n].hw = rid = aid; + i = p->cfg.io[n].id_fp; + + for (c = 0; c < 4; ++c) { + if (!pc->attr[i * 4 + c].acc) + continue; + pc->attr[i * 4 + c].rhw = rid++; + p->cfg.io[n].mask |= 1 << c; - if (pc->p->type == PIPE_SHADER_FRAGMENT && - depr != 0xffff) { - pc->result[depr * 4 + 2].rhw = - (pc->result_nr - 1) * 4; + load_interpolant(pc, &pc->attr[i * 4 + c]); } + aid += popcnt4(p->cfg.io[n].mask); } - } - if (pc->param_nr) { - int rid = 0; + if (!base) + p->cfg.regs[1] |= p->cfg.io[0].mask << 24; - pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); - if (!pc->param) - goto out_err; + m = popcnt4(p->cfg.regs[1] >> 24); - for (i = 0; i < pc->param_nr; i++) { - for (c = 0; c < 4; c++) { - pc->param[i*4+c].type = P_CONST; - pc->param[i*4+c].hw = rid++; - pc->param[i*4+c].index = i; + /* set count of non-position inputs and of non-flat + * non-position inputs for FP_INTERPOLANT_CTRL + */ + p->cfg.regs[1] |= aid - m; + + if (flat_nr) { + i = p->cfg.io[pc->attr_nr - flat_nr].hw; + p->cfg.regs[1] |= (i - m) << 16; + } else + p->cfg.regs[1] |= p->cfg.regs[1] << 16; + + /* mark color semantic for light-twoside */ + n = 0x40; + for (i = 0; i < pc->attr_nr; i++) { + ubyte si, sn; + + sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; + si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + + if (sn == TGSI_SEMANTIC_COLOR) { + p->cfg.two_side[si] = p->cfg.io[i]; + + /* increase colour count */ + p->cfg.regs[0] += popcnt4( + p->cfg.two_side[si].mask) << 16; + + n = MIN2(n, p->cfg.io[i].hw - m); } } + if (n < 0x40) + p->cfg.regs[0] += n; + + /* Initialize FP results: + * FragDepth is always first TGSI and last hw output + */ + i = p->info.writes_z ? 4 : 0; + for (rid = 0; i < pc->result_nr * 4; i++) + pc->result[i].rhw = rid++; + if (p->info.writes_z) + pc->result[2].rhw = rid; } if (pc->immd_nr) { int rid = 0; - pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg)); if (!pc->immd) goto out_err; for (i = 0; i < pc->immd_nr; i++) { - for (c = 0; c < 4; c++) { - pc->immd[i*4+c].type = P_IMMD; - pc->immd[i*4+c].hw = rid++; - pc->immd[i*4+c].index = i; - } + for (c = 0; c < 4; c++, rid++) + ctor_reg(&pc->immd[rid], P_IMMD, i, rid); } } ret = TRUE; out_err: - if (r_usage[0]) - FREE(r_usage[0]); - if (r_usage[1]) - FREE(r_usage[1]); + if (pc->iv_p) + free_temp(pc, pc->iv_p); + if (pc->iv_c) + free_temp(pc, pc->iv_c); - tgsi_parse_free(&p); + tgsi_parse_free(&tp); return ret; } @@ -2110,6 +2281,88 @@ free_nv50_pc(struct nv50_pc *pc) } static boolean +ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) +{ + int i, c; + unsigned rtype[2] = { P_ATTR, P_RESULT }; + + pc->p = p; + pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1; + pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; + pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; + pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + + p->cfg.high_temp = 4; + + p->cfg.two_side[0].hw = 0x40; + p->cfg.two_side[1].hw = 0x40; + + switch (p->type) { + case PIPE_SHADER_VERTEX: + p->cfg.psiz = 0x40; + p->cfg.clpd = 0x40; + p->cfg.io_nr = pc->result_nr; + break; + case PIPE_SHADER_FRAGMENT: + rtype[0] = rtype[1] = P_TEMP; + + p->cfg.regs[0] = 0x01000004; + p->cfg.io_nr = pc->attr_nr; + + if (p->info.writes_z) { + p->cfg.regs[2] |= 0x00000100; + p->cfg.regs[3] |= 0x00000011; + } + if (p->info.uses_kill) + p->cfg.regs[2] |= 0x00100000; + break; + } + + if (pc->temp_nr) { + pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->temp) + return FALSE; + + for (i = 0; i < pc->temp_nr * 4; ++i) + ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1); + } + + if (pc->attr_nr) { + pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->attr) + return FALSE; + + for (i = 0; i < pc->attr_nr * 4; ++i) + ctor_reg(&pc->attr[i], rtype[0], i / 4, -1); + } + + if (pc->result_nr) { + unsigned nr = pc->result_nr * 4; + + pc->result = MALLOC(nr * sizeof(struct nv50_reg)); + if (!pc->result) + return FALSE; + + for (i = 0; i < nr; ++i) + ctor_reg(&pc->result[i], rtype[1], i / 4, -1); + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg)); + if (!pc->param) + return FALSE; + + for (i = 0; i < pc->param_nr; ++i) + for (c = 0; c < 4; ++c, ++rid) + ctor_reg(&pc->param[rid], P_CONST, i, rid); + } + + return TRUE; +} + +static boolean nv50_program_tx(struct nv50_program *p) { struct tgsi_parse_context parse; @@ -2120,8 +2373,10 @@ nv50_program_tx(struct nv50_program *p) pc = CALLOC_STRUCT(nv50_pc); if (!pc) return FALSE; - pc->p = p; - pc->p->cfg.high_temp = 4; + + ret = ctor_nv50_pc(pc, p); + if (ret == FALSE) + goto out_cleanup; ret = nv50_program_tx_prep(pc); if (ret == FALSE) @@ -2141,7 +2396,7 @@ nv50_program_tx(struct nv50_program *p) switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: ++pc->insn_cur; - ret = nv50_program_tx_insn(pc, tok); + ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) goto out_err; break; @@ -2152,8 +2407,8 @@ nv50_program_tx(struct nv50_program *p) if (p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg out; + ctor_reg(&out, P_TEMP, -1, -1); - out.type = P_TEMP; for (k = 0; k < pc->result_nr * 4; k++) { if (pc->result[k].rhw == -1) continue; @@ -2258,30 +2513,19 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - if (!p->data[1] && p->param_nr) { - struct nouveau_resource *heap = - nv50->screen->parm_heap[p->type]; - - if (nouveau_resource_alloc(heap, p->param_nr, p, &p->data[1])) { - while (heap->next && heap->size < p->param_nr) { - struct nv50_program *evict = heap->next->priv; - nouveau_resource_free(&evict->data[1]); - } - - if (nouveau_resource_alloc(heap, p->param_nr, p, - &p->data[1])) - assert(0); - } - } + assert(p->param_nr <= 128); if (p->param_nr) { - unsigned cbuf = NV50_CB_PVP; + unsigned cb; float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - if (p->type == PIPE_SHADER_FRAGMENT) - cbuf = NV50_CB_PFP; - nv50_program_upload_data(nv50, map, p->data[1]->start, - p->param_nr, cbuf); + + if (p->type == PIPE_SHADER_VERTEX) + cb = NV50_CB_PVP; + else + cb = NV50_CB_PFP; + + nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); } } @@ -2303,32 +2547,30 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) upload = TRUE; } - if ((p->data[0] && p->data[0]->start != p->data_start[0]) || - (p->data[1] && p->data[1]->start != p->data_start[1])) { - for (e = p->exec_head; e; e = e->next) { - unsigned ei, ci, bs; - - if (e->param.index < 0) - continue; - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index + p->data[bs]->start; + if (p->data[0] && p->data[0]->start != p->data_start[0]) + upload = TRUE; - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); - } + if (!upload) + return; - if (p->data[0]) - p->data_start[0] = p->data[0]->start; - if (p->data[1]) - p->data_start[1] = p->data[1]->start; + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci, bs; - upload = TRUE; + if (e->param.index < 0) + continue; + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); } - if (!upload) - return; + if (p->data[0]) + p->data_start[0] = p->data[0]->start; #ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); @@ -2402,8 +2644,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); - so_data (so, p->cfg.vp.attr[0]); - so_data (so, p->cfg.vp.attr[1]); + so_data (so, p->cfg.attr[0]); + so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); @@ -2421,7 +2663,6 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - unsigned i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -2438,29 +2679,186 @@ nv50_fragprog_validate(struct nv50_context *nv50) NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); - so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map); - for (i = 0; i < p->cfg.fp.high_map; i++) - so_data(so, p->cfg.fp.map[i]); - so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2); - so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ + so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); - so_data (so, p->cfg.fp.regs[2]); + so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); - so_data (so, p->cfg.fp.regs[3]); + so_data (so, p->cfg.regs[3]); so_method(so, tesla, NV50TCL_FP_START_ID, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.fragprog); so_ref(NULL, &so); } +static void +nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) +{ + struct nv50_program *fp = nv50->fragprog; + struct nv50_program *vp = nv50->vertprog; + unsigned i, c, m = base; + + /* XXX: This can't work correctly in all cases yet, we either + * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has + * to be per FP input instead of per VP output + */ + memset(pntc, 0, 8 * sizeof(uint32_t)); + + for (i = 0; i < fp->cfg.io_nr; i++) { + uint8_t sn, si; + uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + unsigned n = popcnt4(fp->cfg.io[i].mask); + + if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + m += n; + continue; + } + + sn = vp->info.input_semantic_name[j]; + si = vp->info.input_semantic_index[j]; + + if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + ubyte mode = + nv50->rasterizer->pipe.sprite_coord_mode[si]; + + if (mode == PIPE_SPRITE_COORD_NONE) { + m += n; + continue; + } + } + + /* this is either PointCoord or replaced by sprite coords */ + for (c = 0; c < 4; c++) { + if (!(fp->cfg.io[i].mask & (1 << c))) + continue; + pntc[m / 8] |= (c + 1) << ((m % 8) * 4); + ++m; + } + } +} + +static int +nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +{ + int c; + uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; + uint8_t *map = (uint8_t *)p_map; + + for (c = 0; c < 4; ++c) { + if (mf & 1) { + if (fpi->linear == TRUE) + lin[mid / 32] |= 1 << (mid % 32); + map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + } + + oid += mv & 1; + mf >>= 1; + mv >>= 1; + } + + return mid; +} + +void +nv50_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + struct nouveau_stateobj *so; + struct nv50_sreg4 dummy, *vpo; + int i, n, c, m = 0; + uint32_t map[16], lin[4], reg[5], pcrd[8]; + + memset(map, 0, sizeof(map)); + memset(lin, 0, sizeof(lin)); + + reg[1] = 0x00000004; /* low and high clip distance map ids */ + reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ + reg[3] = 0x00000000; /* point size map id & enable */ + reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ + reg[4] = fp->cfg.regs[1]; /* interpolant info */ + + dummy.linear = FALSE; + dummy.mask = 0xf; /* map all components of HPOS */ + m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + + dummy.mask = 0x0; + + if (vp->cfg.clpd < 0x40) { + for (c = 0; c < vp->cfg.clpd_nr; ++c) + map[m++] = vp->cfg.clpd + c; + reg[1] = (m << 8); + } + + reg[0] |= m << 8; /* adjust BFC0 id */ + + /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ + if (nv50->rasterizer->pipe.light_twoside) { + vpo = &vp->cfg.two_side[0]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); + m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + } + + reg[0] += m - 4; /* adjust FFC0 id */ + reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ + + i = 0; + if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) + i = 1; + for (; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; + + n = fp->cfg.io[i].id_vp; + if (n >= vp->cfg.io_nr || + vp->info.output_semantic_name[n] != sn || + vp->info.output_semantic_index[n] != si) + vpo = &dummy; + else + vpo = &vp->cfg.io[n]; + + m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + } + + if (nv50->rasterizer->pipe.point_size_per_vertex) { + map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); + reg[3] = (m++ << 4) | 1; + } + + /* now fill the stateobj */ + so = so_new(64, 0); + + n = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); + so_datap (so, reg, 4); + + so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); + so_data (so, reg[4]); + + so_method(so, tesla, 0x1540, 4); + so_datap (so, lin, 4); + + if (nv50->rasterizer->pipe.point_sprite) { + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + + so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); + so_datap (so, pcrd, 8); + } + + so_ref(so, &nv50->state.programs); + so_ref(NULL, &so); +} + void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { @@ -2476,7 +2874,6 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_ref(NULL, &p->bo); nouveau_resource_free(&p->data[0]); - nouveau_resource_free(&p->data[1]); p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 096e0476aab..d78dee083f1 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -15,6 +15,15 @@ struct nv50_program_exec { } param; }; +struct nv50_sreg4 { + uint8_t hw; + uint8_t id_vp; + uint8_t id_fp; + + uint8_t mask; + boolean linear; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -24,8 +33,8 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nouveau_resource *data[2]; - unsigned data_start[2]; + struct nouveau_resource *data[1]; + unsigned data_start[1]; struct nouveau_bo *bo; @@ -36,14 +45,20 @@ struct nv50_program { struct { unsigned high_temp; unsigned high_result; - struct { - unsigned attr[2]; - } vp; - struct { - unsigned regs[4]; - unsigned map[5]; - unsigned high_map; - } fp; + + uint32_t attr[2]; + uint32_t regs[4]; + + /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ + unsigned io_nr; + struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + + /* FP colour inputs, VP/GP back colour outputs */ + struct nv50_sreg4 two_side[2]; + + /* VP only */ + uint8_t clpd, clpd_nr; + uint8_t psiz; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c7f80a22037..3b08e1b89fb 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -87,12 +87,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 1; case PIPE_CAP_GLSL: return 0; - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_OCCLUSION_QUERY: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 4283808ed93..81fa3e34c59 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -276,6 +276,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, 0x1684, 1); so_data (so, cso->flatshade_first ? 0 : 1); + so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); + so_data (so, cso->light_twoside); + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); so_data (so, fui(cso->line_width)); so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); @@ -294,6 +297,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POINT_SIZE, 1); so_data (so, fui(cso->point_size)); + so_method(so, tesla, NV50TCL_POINT_SPRITE_ENABLE, 1); + so_data (so, cso->point_sprite); + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); if (cso->front_winding == PIPE_WINDING_CCW) { so_data(so, nvgl_polygon_mode(cso->fill_ccw)); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 344c2cf6dde..5a3559ed181 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } - so_data(so, bo->tile_mode << 4); + so_data(so, nv50_miptree(pt)-> + level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); @@ -187,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -208,6 +212,12 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vtxattr); } nv50->state.dirty = 0; +} + +void +nv50_state_flush_notify(struct nouveau_channel *chan) +{ + struct nv50_context *nv50 = chan->user_private; so_emit_reloc_markers(chan, nv50->state.fb); so_emit_reloc_markers(chan, nv50->state.vertprog); @@ -238,6 +248,9 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + nv50_linkage_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b266324f58d..6bf6f773b0c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) format = nv50_format(ps->format); if (format < 0) return 1; - + if (!bo->tile_flags) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, mt->level[ps->level].pitch); OUT_RING (chan, ps->width); OUT_RING (chan, ps->height); OUT_RELOCh(chan, bo, ps->offset, flags); @@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, mt->level[ps->level].tile_mode << 4); OUT_RING (chan, 1); OUT_RING (chan, 0); BEGIN_RING(chan, eng2d, mthd + 0x18, 4); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index e9c3562194b..bb7731855cd 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -89,14 +89,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, if (src_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); - OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } if (dst_bo->tile_flags) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); - OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, (dy << 16) | (dx * cpp)); } else { dst_offset += (line_count * dst_pitch); } diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index ec58eeacc21..69e4724790a 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ + r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index da67bc29b89..9cc455135db 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,6 +22,9 @@ #include "r300_context.h" +#include "r300_flush.h" +#include "r300_state_invariant.h" + static boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -146,6 +149,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.winsys = (struct pipe_winsys*)r300_winsys; r300->context.screen = r300_screen(screen); + r300_init_debug(r300); + r300->context.destroy = r300_destroy_context; r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f78492d4aa9..52b1c9a6b26 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -184,8 +184,15 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - /* Stride (pitch?) of this texture in bytes */ - unsigned stride; + /** + * If non-zero, override the natural texture layout with + * a custom stride (in bytes). + * + * \note Mipmapping fails for textures with a non-natural layout! + * + * \sa r300_texture_get_stride + */ + unsigned stride_override; /* Total size of this texture, in bytes. */ unsigned size; @@ -211,10 +218,7 @@ struct r300_vertex_format { int fs_tab[16]; }; -static struct pipe_viewport_state r300_viewport_identity = { - .scale = {1.0, 1.0, 1.0, 1.0}, - .translate = {0.0, 0.0, 0.0, 0.0}, -}; +extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { /* Parent class */ @@ -275,6 +279,9 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + + /** Combination of DBG_xxx flags */ + unsigned debug; }; /* Convenience cast wrapper. */ @@ -288,4 +295,40 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +/* Debug functionality. */ + +/** + * Debug flags to disable/enable certain groups of debugging outputs. + * + * \note These may be rather coarse, and the grouping may be impractical. + * If you find, while debugging the driver, that a different grouping + * of these flags would be beneficial, just feel free to change them + * but make sure to update the documentation in r300_debug.c to reflect + * those changes. + */ +/*@{*/ +#define DBG_HELP 0x0000001 +#define DBG_FP 0x0000002 +#define DBG_VP 0x0000004 +#define DBG_CS 0x0000008 +#define DBG_DRAW 0x0000010 +/*@}*/ + +static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) +{ + return (ctx->debug & flags) ? true : false; +} + +static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) +{ + if (DBG_ON(ctx, flags)) { + va_list va; + va_start(va, fmt); + debug_vprintf(fmt, va); + va_end(va); + } +} + +void r300_init_debug(struct r300_context * ctx); + #endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 71b142c0dbf..0a7e4703636 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -49,7 +49,8 @@ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) #define CS_LOCALS(context) \ - struct r300_winsys* cs_winsys = context->winsys; \ + struct r300_context* const cs_context_copy = (context); \ + struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ @@ -58,7 +59,7 @@ #define BEGIN_CS(size) do { \ CHECK_CS(size); \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ size, __FUNCTION__, __FILE__, __LINE__); \ } \ cs_winsys->begin_cs(cs_winsys, (size), \ @@ -78,7 +79,7 @@ #define OUT_CS_REG(register, value) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, 0)); \ @@ -89,14 +90,14 @@ * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1))); \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ @@ -107,7 +108,7 @@ #define END_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ if (cs_count != 0) \ @@ -117,7 +118,7 @@ #define FLUSH_CS do { \ if (VERY_VERBOSE_CS) { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ + DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ __FILE__, __LINE__); \ } \ cs_winsys->flush_cs(cs_winsys); \ @@ -127,7 +128,7 @@ #define OUT_CS_ONE_REG(register, count) do { \ if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ @@ -141,7 +142,7 @@ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for index buffer %p," \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ OUT_CS(offset); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c new file mode 100644 index 00000000000..15308dda1de --- /dev/null +++ b/src/gallium/drivers/r300/r300_debug.c @@ -0,0 +1,88 @@ +/* + * Copyright 2009 Nicolai Haehnle <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +#include <ctype.h> + + +struct debug_option { + const char * name; + unsigned flag; + const char * description; +}; + +static struct debug_option debug_options[] = { + { "help", DBG_HELP, "Helpful meta-information about the driver" }, + { "fp", DBG_FP, "Fragment program handling" }, + { "vp", DBG_VP, "Vertex program handling" }, + { "cs", DBG_CS, "Command submissions" }, + { "draw", DBG_DRAW, "Draw and emit" }, + + { "all", ~0, "Convenience option that enables all debug flags" }, + + /* must be last */ + { 0, 0, 0 } +}; + +void r300_init_debug(struct r300_context * ctx) +{ + const char * options = debug_get_option("RADEON_DEBUG", 0); + boolean printhint = false; + + if (options) { + while(*options) { + if (*options == ' ' || *options == ',') { + options++; + continue; + } + + size_t length = strcspn(options, " ,"); + struct debug_option * opt; + + for(opt = debug_options; opt->name; ++opt) { + if (!strncmp(options, opt->name, length)) { + ctx->debug |= opt->flag; + break; + } + } + + if (!opt->name) { + debug_printf("Unknown debug option: %s\n", options); + printhint = true; + } + + options += length; + } + + if (!ctx->debug) + printhint = true; + } + + if (printhint || ctx->debug & DBG_HELP) { + debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" + "to a comma-separated list of debug options. Available options are:\n"); + for(struct debug_option * opt = debug_options; opt->name; ++opt) { + debug_printf(" %s: %s\n", opt->name, opt->description); + } + } +} diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index bd4d59e6f1a..a1b36ba2ed1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "r300_emit.h" #include "r300_fs.h" +#include "r300_state_derived.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -282,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300, for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -299,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = tex->stride / tex->tex.block.size; + pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -490,7 +491,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) { CS_LOCALS(r300); - debug_printf("r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 350691d592d..c4002b8e5d0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -56,6 +56,11 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); +void r300_emit_query_begin(struct r300_context* r300, + struct r300_query* query); +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query); + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 36463b9a2eb..a0e848a59ac 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -96,7 +96,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_FP); compiler.code = &fs->code; compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 1d5185b417e..2880d34877f 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -22,6 +22,8 @@ #include "r300_query.h" +#include "r300_emit.h" + static struct pipe_query* r300_create_query(struct pipe_context* pipe, unsigned query_type) { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index aced4ab8877..737396d8d97 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,15 +26,17 @@ #include "r300_cs.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_reg.h" #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ +#define R300_MAX_VBO_SIZE (1024 * 1024) struct r300_render { /* Parent class */ struct vbuf_render base; - + /* Pipe context */ struct r300_context* r300; @@ -45,7 +47,10 @@ struct r300_render { /* VBO */ struct pipe_buffer* vbo; - size_t vbo_alloc_size; + size_t vbo_size; + size_t vbo_offset; + size_t vbo_max_used; + void * vbo_ptr; }; static INLINE struct r300_render* @@ -74,19 +79,18 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (r300render->vbo && (size > r300render->vbo_alloc_size)) { - pipe_buffer_reference(&r300render->vbo, NULL); - } - - if (!r300render->vbo) { + if (size + r300render->vbo_offset > r300render->vbo_size) + { r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - size); + R300_MAX_VBO_SIZE); + r300render->vbo_size = R300_MAX_VBO_SIZE; } - r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size); r300render->vertex_size = vertex_size; + r300->vbo = r300render->vbo; + r300->vbo_offset = r300render->vbo_offset; return (r300render->vbo) ? TRUE : FALSE; } @@ -96,8 +100,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - return (unsigned char*)pipe_buffer_map(screen, r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, @@ -106,15 +112,24 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; + CS_LOCALS(r300render->r300); + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); + END_CS; + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); + struct r300_context* r300 = r300render->r300; - pipe_buffer_reference(&r300render->vbo, NULL); + r300render->vbo_offset += r300render->vbo_max_used; + r300render->vbo_max_used = 0; + r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, @@ -162,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void prepare_render(struct r300_render* render, unsigned count) +static void r300_prepare_render(struct r300_render* render, unsigned count) { struct r300_context* r300 = render->r300; CS_LOCALS(r300); - r300->vbo = render->vbo; - r300_emit_dirty_state(r300); } @@ -182,9 +195,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); - debug_printf("r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); @@ -207,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - prepare_render(r300render, count); + r300_prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, @@ -216,25 +229,7 @@ static void r300_render_draw(struct vbuf_render* render, return; } -/* - index_map = pipe_buffer_map(screen, index_buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count); - pipe_buffer_unmap(screen, index_buffer); - - debug_printf("r300: Doing indexbuf render, count %d\n", count); - - BEGIN_CS(8); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim); - OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); - OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); - OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; */ - - BEGIN_CS(4 + (count+1)/2); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); @@ -272,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; + r300render->vbo = NULL; + r300render->vbo_size = 0; + r300render->vbo_offset = 0; + return &r300render->base; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 15740f61252..3b5b1bbd37f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -93,8 +93,6 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } - case PIPE_CAP_S3TC: - return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -323,13 +321,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); trans->transfer.format = texture->format; + trans->transfer.x = x; + trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, - texture->width[level]), 32); + trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; trans->offset = offset; } @@ -356,7 +355,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, if (transfer->usage != PIPE_TRANSFER_READ) { flags |= PIPE_BUFFER_USAGE_CPU_WRITE; } - + map = pipe_buffer_map(screen, tex->buffer, flags); if (!map) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 81808017f86..88cb9af6fb7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,10 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_pack_color.h" -#include "util/u_debug.h" +#include "tgsi/tgsi_parse.h" #include "pipe/p_config.h" #include "pipe/internal/p_winsys_screen.h" diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c01e61a9b19..5f6b225d340 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -23,6 +23,7 @@ #include "r300_state_derived.h" #include "r300_fs.h" +#include "r300_state_inlines.h" #include "r300_vs.h" /* r300_state_derived: Various bits of state which are dependent upon @@ -195,13 +196,13 @@ static void r300_vertex_psc(struct r300_context* r300, * and not on attrib information. */ if (r300screen->caps->has_tcl) { attrib_count = r300->vs->info.num_inputs; - debug_printf("r300: routing %d attribs in psc for vs\n", + DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", attrib_count); } else { attrib_count = vinfo->num_attribs; - debug_printf("r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," " tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, tab[i]); @@ -299,18 +300,18 @@ static void r300_update_fs_tab(struct r300_context* r300) } /* Now that we know where everything is... */ - debug_printf("r300: fp input count: %d\n", info->num_inputs); + DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); for (i = 0; i < info->num_inputs; i++) { switch (tab[i]) { case INTERP_LINEAR: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, color, tab %d\n", i, cols_emitted); tab[i] = cols_emitted; cols_emitted++; break; case INTERP_PERSPECTIVE: - debug_printf("r300: attrib: " + DBG(r300, DBG_DRAW, "r300: attrib: " "stack offset %d, texcoord, tab %d\n", i, cols + texs); tab[i] = cols + texs; diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 63ae8eb8d08..71a4a47b003 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_DERIVED_H #define R300_STATE_DERIVED_H -#include "draw/draw_vertex.h" - -#include "r300_context.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_update_derived_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 7d822fec483..3865730d635 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -23,6 +23,12 @@ #include "r300_state_invariant.h" + +struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + /* Calculate and emit invariant state. This is data that the 3D engine * will probably want at the beginning of every CS, but it's not currently * handled by any CSO setup, and in addition it doesn't really change much. diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 96e6e4a77d4..cc6288cb519 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300, unsigned w, unsigned h) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = dest->stride / dest->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; CS_LOCALS(r300); r300_emit_blend_state(r300, &blend_clear_state); @@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; boolean invalid = FALSE; CS_LOCALS(r300); @@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = srctex->stride / srctex->tex.block.size; + unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; boolean invalid = FALSE; float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 590052509cc..6e8c3683200 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -25,7 +25,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, - unsigned pitch, unsigned levels) { struct r300_texture_state* state = &tex->state; @@ -38,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, /* XXX */ state->format1 = r300_translate_texformat(tex->tex.format); - state->format2 = pitch - 1; + state->format2 = r300_texture_get_stride(tex, 0); /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ @@ -50,13 +49,30 @@ static void r300_setup_texture_state(struct r300_texture* tex, } debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", - width, height, pitch, levels); + width, height, levels); +} + +/** + * Return the stride, in bytes, of the texture images of the given texture + * at the given level. + */ +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) +{ + if (tex->stride_override) + return tex->stride_override; + + if (level > tex->tex.last_level) { + debug_printf("%s: level (%u) > last_level (%u)\n", level, tex->tex.last_level); + return 0; + } + + return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, offset; + int stride, size; int i; for (i = 0; i <= base->last_level; i++) { @@ -74,7 +90,7 @@ static void r300_setup_miptree(struct r300_texture* tex) * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(pf_get_stride(&base->block, base->width[i]), 32); + stride = r300_texture_get_stride(tex, i); size = stride * base->nblocksy[i] * base->depth[i]; tex->offset[i] = align(tex->size, 32); @@ -84,10 +100,6 @@ static void r300_setup_miptree(struct r300_texture* tex) "(%dx%dx%d px, pitch %d bytes)\n", i, base->width[i], base->height[i], base->depth[i], stride); - /* Save stride of first level to the texture. */ - if (i == 0) { - tex->stride = stride; - } } } @@ -109,7 +121,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, template->width[0], template->height[0], - template->width[0], template->last_level); + template->last_level); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -189,11 +201,10 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride = *stride; + tex->stride_override = *stride; /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->stride, 0); + r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -221,7 +232,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture, pipe_buffer_reference(buffer, tex->buffer); if (stride) { - *stride = tex->stride; + *stride = r300_texture_get_stride(tex, 0); } return TRUE; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 3b56f0307c0..3109af5baca 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -30,8 +30,12 @@ #include "r300_context.h" #include "r300_reg.h" +struct r300_texture; + void r300_init_screen_texture_functions(struct pipe_screen* screen); +unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 2c63a46c277..7f120088308 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -161,7 +161,6 @@ static unsigned translate_saturate(unsigned saturate) /* fall-through */ case TGSI_SAT_NONE: return RC_SATURATE_NONE; case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE; - case TGSI_SAT_MINUS_PLUS_ONE: return RC_SATURATE_MINUS_PLUS_ONE; } } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 2cb903bba2f..12a6e37be62 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -116,7 +116,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Setup the compiler */ rc_init(&compiler.Base); - compiler.Base.Debug = 1; + compiler.Base.Debug = DBG_ON(r300, DBG_VP); compiler.code = &vs->code; compiler.UserData = vs; diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index fa59277438c..8fac8e6e05f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,8 +36,6 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" -#include "sp_surface.h" -#include "sp_state.h" #include "sp_tile_cache.h" @@ -85,5 +83,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, /* non-cached surface */ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); #endif - } + } + + softpipe->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h index 2e450672f58..9be3b86fe9f 100644 --- a/src/gallium/drivers/softpipe/sp_clear.h +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -32,7 +32,6 @@ #ifndef SP_CLEAR_H #define SP_CLEAR_H -#include "pipe/p_state.h" struct pipe_context; extern void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8a..c699c433e9c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -83,7 +83,8 @@ softpipe_unmap_transfers(struct softpipe_context *sp) } -static void softpipe_destroy( struct pipe_context *pipe ) +static void +softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); uint i; @@ -121,6 +122,15 @@ static void softpipe_destroy( struct pipe_context *pipe ) FREE( softpipe ); } + +/** + * if (the texture is being used as a framebuffer surface) + * return PIPE_REFERENCED_FOR_WRITE + * else if (the texture is a bound texture source) + * return PIPE_REFERENCED_FOR_READ XXX not done yet + * else + * return PIPE_UNREFERENCED + */ static unsigned int softpipe_is_texture_referenced( struct pipe_context *pipe, struct pipe_texture *texture, @@ -129,15 +139,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; - if(softpipe->dirty_render_cache) { + if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - if(softpipe->framebuffer.cbufs[i] && - softpipe->framebuffer.cbufs[i]->texture == texture) + if (softpipe->framebuffer.cbufs[i] && + softpipe->framebuffer.cbufs[i]->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } - if(softpipe->framebuffer.zsbuf && - softpipe->framebuffer.zsbuf->texture == texture) + if (softpipe->framebuffer.zsbuf && + softpipe->framebuffer.zsbuf->texture == texture) { return PIPE_REFERENCED_FOR_WRITE; + } } /* FIXME: we also need to do the same for the texture cache */ @@ -145,6 +157,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + static unsigned int softpipe_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) @@ -152,6 +165,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, return PIPE_UNREFERENCED; } + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -222,7 +236,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 6178c4ac7e3..ce770184158 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -65,8 +65,6 @@ softpipe_get_param(struct pipe_screen *screen, int param) return 1; case PIPE_CAP_GLSL: return 1; - case PIPE_CAP_S3TC: - return 0; case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: @@ -141,6 +139,7 @@ softpipe_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_Z32_FLOAT: return FALSE; default: return TRUE; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 75551000c9b..9258f641091 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -91,6 +91,23 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) vinfo->num_attribs = 0; for (i = 0; i < spfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (spfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(softpipe->draw, @@ -106,7 +123,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -114,7 +131,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a1d3bade27a..f99a30277dd 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -464,7 +464,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) unsigned face; float sc, tc, ma; - if (arx > ary && arx > arz) { + if (arx >= ary && arx >= arz) { if (rx >= 0.0F) { face = PIPE_TEX_FACE_POS_X; sc = -rz; @@ -478,7 +478,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) ma = arx; } } - else if (ary > arx && ary > arz) { + else if (ary >= arx && ary >= arz) { if (ry >= 0.0F) { face = PIPE_TEX_FACE_POS_Y; sc = rx; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 70f09324311..548e40c4a8c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -30,26 +30,21 @@ * Michel Dänzer <[email protected]> */ -#include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" #include "sp_screen.h" #include "sp_winsys.h" -/* Simple, maximally packed layout. - */ - - -/* Conventional allocation path for non-display textures: +/** + * Conventional allocation path for non-display textures: + * Use a simple, maximally packed layout. */ static boolean softpipe_texture_layout(struct pipe_screen *screen, @@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } + +/** + * Texture layout for simple color buffers. + */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) @@ -112,9 +111,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } - - - static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -342,14 +338,13 @@ softpipe_transfer_map( struct pipe_screen *screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) - { + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ softpipe_screen(screen)->timestamp++; } - + xfer_map = map + softpipe_transfer(transfer)->offset + transfer->y / transfer->block.height * transfer->stride + transfer->x / transfer->block.width * transfer->block.size; @@ -360,7 +355,7 @@ softpipe_transfer_map( struct pipe_screen *screen, static void softpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) + struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -377,12 +372,6 @@ softpipe_transfer_unmap(struct pipe_screen *screen, void -softpipe_init_texture_funcs(struct softpipe_context *sp) -{ -} - - -void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; @@ -404,7 +393,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, unsigned *stride ) { - struct softpipe_texture *tex = (struct softpipe_texture *)texture; + struct softpipe_texture *tex = (struct softpipe_texture *) texture; if (!tex) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 893aa7d11d8..b1b6130b22c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -74,9 +74,6 @@ softpipe_transfer(struct pipe_transfer *pt) extern void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - -extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); |