diff options
author | Roland Scheidegger <[email protected]> | 2010-05-04 15:58:29 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2010-05-04 15:58:29 +0200 |
commit | 0ae2f59c0287f4baec6c7de5f2f0fdf736fba26d (patch) | |
tree | ee14bf3e8bba80649541c4e13fc07c60baf6c248 /src/gallium/auxiliary/gallivm | |
parent | 7662e3519bef3802024da3050b886068281e02b1 (diff) | |
parent | 1c920c61764b17fd9fb4a89d2db7355fbe1d7565 (diff) |
Merge commit 'origin/master' into gallium-msaa
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_limits.h | 53 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 207 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 142 |
6 files changed, 301 insertions, 140 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 20ae958714b..f372a48846f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - /* FIXME: improve precision */ + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + /* + * XXX: Added precision is not always necessary, so only enable this + * when we have a better system in place to track minimum precision. + */ + +#if 0 + /* + * Do one Newton-Raphson step to improve precision: + * + * x1 = (2 - a * rcp(a)) * rcp(a) + */ + + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef rcp_a; + LLVMValueRef res; + + rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + + res = LLVMBuildMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildSub(bld->builder, two, res, ""); + res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + + return rcp_a; +#else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); +#endif + } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h new file mode 100644 index 00000000000..e095a0abe3a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_BLD_LIMITS_H_ +#define LP_BLD_LIMITS_H_ + +/* + * TGSI translation limits. + * + * Some are slightly above SM 3.0 requirements to give some wiggle room to + * the state trackers. + */ + +#define LP_MAX_TGSI_TEMPS 256 + +#define LP_MAX_TGSI_ADDRS 16 + +#define LP_MAX_TGSI_IMMEDIATES 256 + +/** + * Maximum control flow nesting + * + * SM3.0 requires 24 + */ +#define LP_MAX_TGSI_NESTING 32 + + +#endif /* LP_BLD_LIMITS_H_ */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 8ceb20473d5..955b1d21ee5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -173,7 +173,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, + LLVMValueRef explicit_lod, LLVMValueRef *texel); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 74dc9e1d812..3f0ea05b795 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -30,6 +30,7 @@ * Texture sampling -- SoA. * * @author Jose Fonseca <[email protected]> + * @author Brian Paul <[email protected]> */ #include "pipe/p_defines.h" @@ -325,6 +326,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->format_desc, x, y, z, y_stride, z_stride); + if (use_border) { + /* If we can sample the border color, it means that texcoords may + * lie outside the bounds of the texture image. We need to do + * something to prevent reading out of bounds and causing a segfault. + * + * Simply AND the texture coords with !use_border. This will cause + * coords which are out of bounds to become zero. Zero's guaranteed + * to be inside the texture image. + */ + offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border); + } + lp_build_fetch_rgba_soa(bld->builder, bld->format_desc, bld->texel_type, @@ -891,17 +904,24 @@ lp_build_minify(struct lp_build_sample_context *bld, * \param s vector of texcoord s values * \param t vector of texcoord t values * \param r vector of texcoord r values - * \param shader_lod_bias vector float with the shader lod bias, + * \param lod_bias optional float vector with the shader lod bias + * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth + * + * XXX: The resulting lod is scalar, so ignore all but the first element of + * derivatives, lod_bias, etc that are passed by the shader. */ static LLVMValueRef lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, - LLVMValueRef shader_lod_bias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth) @@ -914,7 +934,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); } else { - const int dims = texture_dims(bld->static_state->target); struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); @@ -922,83 +941,76 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, bld->static_state->min_lod); LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = LLVMBuildSub(bld->builder, r1, r0, ""); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildSub(bld->builder, r2, r0, ""); - drdy = lp_build_abs(float_bld, drdy); - } + LLVMValueRef lod; + + if (explicit_lod) { + lod = LLVMBuildExtractElement(bld->builder, explicit_lod, + index0, ""); } + else { + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + */ + dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdy = lp_build_abs(float_bld, drdy); + } + } - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } } - } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); - /* add sampler lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias"); + /* add shader lod bias */ + if (lod_bias) { + lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, + index0, ""); + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + } + } - /* add shader lod bias */ - /* XXX for now we take only the first element since our lod is scalar */ - shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias, - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias"); + /* add sampler lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -1584,7 +1596,10 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth, @@ -1622,7 +1637,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth); + lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth); } /* @@ -2049,6 +2066,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, /** + * Just set texels to white instead of actually sampling the texture. + * For debugging. + */ +static void +lp_build_sample_nop(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + unsigned chan; + + for (chan = 0; chan < 4; chan++) { + /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/ + texel[chan] = texel_bld->one; + } +} + + +/** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. @@ -2062,7 +2097,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel) { struct lp_build_sample_context bld; @@ -2113,19 +2151,24 @@ lp_build_sample_soa(LLVMBuilderRef builder, height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); - if (util_format_is_rgba8_variant(bld.format_desc) && - static_state->target == PIPE_TEXTURE_2D && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { + if (0) { + /* For debug: no-op texture sampling */ + lp_build_sample_nop(&bld, texel); + } + else if (util_format_is_rgba8_variant(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { /* special case */ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, row_stride_array, data_array, texel); } else { - lp_build_sample_general(&bld, unit, s, t, r, lodbias, + lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, + lod_bias, explicit_lod, width, height, depth, width_vec, height_vec, depth_vec, row_stride_array, img_stride_array, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 2eac5da6c69..5ce1385c480 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -65,7 +65,10 @@ struct lp_build_sampler_soa unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel); }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index d3c769e28b8..0b1a28a7ab7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -54,13 +54,10 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" +#include "lp_bld_limits.h" #include "lp_bld_debug.h" -#define LP_MAX_TEMPS 256 -#define LP_MAX_IMMEDIATES 256 - - #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -84,7 +81,6 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 -#define LP_TGSI_MAX_NESTING 16 struct lp_exec_mask { struct lp_build_context *bld; @@ -93,19 +89,19 @@ struct lp_exec_mask { LLVMTypeRef int_vec_type; - LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; int cond_stack_size; LLVMValueRef cond_mask; - LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef break_stack[LP_MAX_TGSI_NESTING]; int break_stack_size; LLVMValueRef break_mask; - LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING]; int cont_stack_size; LLVMValueRef cont_mask; - LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING]; int loop_stack_size; LLVMBasicBlockRef loop_block; @@ -124,9 +120,9 @@ struct lp_build_tgsi_soa_context struct lp_build_sampler_soa *sampler; - LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; - LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; - LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; /* we allocate an array of temps if we have indirect * addressing and then the temps above is unused */ @@ -198,6 +194,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, LLVMValueRef val) { + assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, mask->int_vec_type, ""); @@ -239,6 +236,10 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) if (mask->cond_stack_size == 0) mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); + assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING); + assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); + mask->break_stack[mask->break_stack_size++] = mask->break_mask; mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; @@ -255,16 +256,9 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - /* mask->break_stack_size > 1 implies that we encountered a break - * statemant already and if that's the case we want to make sure - * our mask is a combination of the previous break and the current - * execution mask */ - if (mask->break_stack_size > 1) { - mask->break_mask = LLVMBuildAnd(mask->bld->builder, - mask->break_mask, - exec_mask, "break_full"); - } else - mask->break_mask = exec_mask; + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); lp_exec_mask_update(mask); } @@ -275,12 +269,9 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - if (mask->cont_stack_size > 1) { - mask->cont_mask = LLVMBuildAnd(mask->bld->builder, - mask->cont_mask, - exec_mask, ""); - } else - mask->cont_mask = exec_mask; + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); lp_exec_mask_update(mask); } @@ -589,7 +580,6 @@ emit_store( case TGSI_FILE_PREDICATE: /* FIXME */ - assert(0); break; default: @@ -602,21 +592,37 @@ emit_store( * High-level instruction translators. */ +enum tex_modifier { + TEX_MODIFIER_NONE = 0, + TEX_MODIFIER_PROJECTED, + TEX_MODIFIER_LOD_BIAS, + TEX_MODIFIER_EXPLICIT_LOD, + TEX_MODIFIER_EXPLICIT_DERIV +}; static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - boolean apply_lodbias, - boolean projected, + enum tex_modifier modifier, LLVMValueRef *texel) { - const uint unit = inst->Src[1].Register.Index; - LLVMValueRef lodbias; + unsigned unit; + LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; LLVMValueRef coords[3]; + LLVMValueRef ddx[3]; + LLVMValueRef ddy[3]; unsigned num_coords; unsigned i; + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + for (i = 0; i < 4; i++) { + texel[i] = bld->base.undef; + } + return; + } + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; @@ -637,29 +643,57 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, return; } - if(apply_lodbias) - lodbias = emit_fetch( bld, inst, 0, 3 ); - else - lodbias = bld->base.zero; + if (modifier == TEX_MODIFIER_LOD_BIAS) { + lod_bias = emit_fetch( bld, inst, 0, 3 ); + explicit_lod = NULL; + } + else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + lod_bias = NULL; + explicit_lod = emit_fetch( bld, inst, 0, 3 ); + } + else { + lod_bias = NULL; + explicit_lod = NULL; + } - if (projected) { + if (modifier == TEX_MODIFIER_PROJECTED) { oow = emit_fetch( bld, inst, 0, 3 ); oow = lp_build_rcp(&bld->base, oow); } for (i = 0; i < num_coords; i++) { coords[i] = emit_fetch( bld, inst, 0, i ); - if (projected) + if (modifier == TEX_MODIFIER_PROJECTED) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } for (i = num_coords; i < 3; i++) { coords[i] = bld->base.undef; } + if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { + for (i = 0; i < num_coords; i++) { + ddx[i] = emit_fetch( bld, inst, 1, i ); + ddy[i] = emit_fetch( bld, inst, 2, i ); + } + unit = inst->Src[3].Register.Index; + } else { + for (i = 0; i < num_coords; i++) { + ddx[i] = emit_ddx( bld, coords[i] ); + ddy[i] = emit_ddy( bld, coords[i] ); + } + unit = inst->Src[1].Register.Index; + } + for (i = num_coords; i < 3; i++) { + ddx[i] = bld->base.undef; + ddy[i] = bld->base.undef; + } + bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, bld->base.type, - unit, num_coords, coords, lodbias, + unit, num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, texel); } @@ -739,7 +773,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, lp_build_mask_update(bld->mask, mask); } -static int +static void emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) @@ -753,6 +787,7 @@ emit_declaration( for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: + assert(idx < LP_MAX_TGSI_TEMPS); if (bld->has_indirect_addressing) { LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); @@ -772,18 +807,21 @@ emit_declaration( break; case TGSI_FILE_ADDRESS: + assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < NUM_CHANNELS; i++) bld->addr[idx][i] = lp_build_alloca(bld->base.builder, vec_type, ""); break; + case TGSI_FILE_PREDICATE: + _debug_printf("warning: predicate registers not yet implemented\n"); + break; + default: /* don't need to declare other vars */ break; } } - - return TRUE; } @@ -1359,12 +1397,11 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); break; case TGSI_OPCODE_TXD: - /* FIXME */ - return FALSE; + emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); break; case TGSI_OPCODE_UP2H: @@ -1466,7 +1503,7 @@ emit_instruction( break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1571,11 +1608,11 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); break; case TGSI_OPCODE_BRK: @@ -1765,10 +1802,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ - { - if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration )) - _debug_printf("warning: failed to define LLVM variable\n"); - } + emit_declaration( &bld, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: @@ -1787,7 +1821,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, { const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert(size <= 4); - assert(num_immediates < LP_MAX_IMMEDIATES); + assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); |