diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 302 |
1 files changed, 207 insertions, 95 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 7a64392d3c1..844d1d935b5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -47,8 +47,7 @@ /* - * Bri-linear factor. Use zero or any other number less than one to force - * tri-linear filtering. + * Bri-linear factor. Should be greater than one. */ #define BRILINEAR_FACTOR 2 @@ -201,8 +200,8 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef float_size; LLVMValueRef rho; - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdx = ddx[0]; + dsdy = ddy[0]; if (dims <= 1) { rho_x = dsdx; @@ -215,15 +214,15 @@ lp_build_rho(struct lp_build_sample_context *bld, rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, ""); - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdx = ddx[1]; + dtdy = ddy[1]; rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, ""); if (dims >= 3) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdx = ddx[2]; + drdy = ddy[2]; rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, ""); rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, ""); @@ -294,31 +293,30 @@ lp_build_rho(struct lp_build_sample_context *bld, * TODO: This could be done in fixed point, where applicable. */ static void -lp_build_brilinear_lod(struct lp_build_sample_context *bld, +lp_build_brilinear_lod(struct lp_build_context *bld, LLVMValueRef lod, double factor, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { - struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod_fpart; - float pre_offset = (factor - 0.5)/factor - 0.5; - float post_offset = 1 - factor; + double pre_offset = (factor - 0.5)/factor - 0.5; + double post_offset = 1 - factor; if (0) { lp_build_printf(bld->builder, "lod = %f\n", lod); } - lod = lp_build_add(float_bld, lod, - lp_build_const_vec(float_bld->type, pre_offset)); + lod = lp_build_add(bld, lod, + lp_build_const_vec(bld->type, pre_offset)); - lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart); + lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); - lod_fpart = lp_build_mul(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, factor)); + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); - lod_fpart = lp_build_add(float_bld, lod_fpart, - lp_build_const_vec(float_bld->type, post_offset)); + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); /* * It's not necessary to clamp lod_fpart since: @@ -335,6 +333,61 @@ lp_build_brilinear_lod(struct lp_build_sample_context *bld, } +/* + * Combined log2 and brilinear lod computation. + * + * It's in all identical to calling lp_build_fast_log2() and + * lp_build_brilinear_lod() above, but by combining we can compute the interger + * and fractional part independently. + */ +static void +lp_build_brilinear_rho(struct lp_build_context *bld, + LLVMValueRef rho, + double factor, + LLVMValueRef *out_lod_ipart, + LLVMValueRef *out_lod_fpart) +{ + LLVMValueRef lod_ipart; + LLVMValueRef lod_fpart; + + const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); + const double post_offset = 1 - 2*factor; + + assert(bld->type.floating); + + assert(lp_check_value(bld->type, rho)); + + /* + * The pre factor will make the intersections with the exact powers of two + * happen precisely where we want then to be, which means that the integer + * part will not need any post adjustments. + */ + rho = lp_build_mul(bld, rho, + lp_build_const_vec(bld->type, pre_factor)); + + /* ipart = ifloor(log2(rho)) */ + lod_ipart = lp_build_extract_exponent(bld, rho, 0); + + /* fpart = rho / 2**ipart */ + lod_fpart = lp_build_extract_mantissa(bld, rho); + + lod_fpart = lp_build_mul(bld, lod_fpart, + lp_build_const_vec(bld->type, factor)); + + lod_fpart = lp_build_add(bld, lod_fpart, + lp_build_const_vec(bld->type, post_offset)); + + /* + * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: + * - the above expression will never produce numbers greater than one. + * - the mip filtering branch is only taken if lod_fpart is positive + */ + + *out_lod_ipart = lod_ipart; + *out_lod_fpart = lod_fpart; +} + + /** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X @@ -389,16 +442,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, rho = lp_build_rho(bld, ddx, ddy); - /* compute lod = log2(rho) */ - if ((mip_filter == PIPE_TEX_MIPFILTER_NONE || - mip_filter == PIPE_TEX_MIPFILTER_NEAREST) && - !lod_bias && + /* + * Compute lod = log2(rho) + */ + + if (!lod_bias && !bld->static_state->lod_bias_non_zero && !bld->static_state->apply_max_lod && !bld->static_state->apply_min_lod) { - *out_lod_ipart = lp_build_ilog2(float_bld, rho); - *out_lod_fpart = bld->float_bld.zero; - return; + /* + * Special case when there are no post-log2 adjustments, which + * saves instructions but keeping the integer and fractional lod + * computations separate from the start. + */ + + if (mip_filter == PIPE_TEX_MIPFILTER_NONE || + mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + *out_lod_ipart = lp_build_ilog2(float_bld, rho); + *out_lod_fpart = bld->float_bld.zero; + return; + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && + !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, + out_lod_ipart, out_lod_fpart); + return; + } } if (0) { @@ -437,21 +506,22 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (BRILINEAR_FACTOR > 1.0) { - lp_build_brilinear_lod(bld, lod, BRILINEAR_FACTOR, + if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { + lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); } - lp_build_name(*out_lod_ipart, "lod_ipart"); lp_build_name(*out_lod_fpart, "lod_fpart"); } else { *out_lod_ipart = lp_build_iround(float_bld, lod); } + lp_build_name(*out_lod_ipart, "lod_ipart"); + return; } @@ -630,37 +700,21 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, - LLVMValueRef *out_width_vec, - LLVMValueRef *out_height_vec, - LLVMValueRef *out_depth_vec, + LLVMValueRef *out_size, LLVMValueRef *row_stride_vec, LLVMValueRef *img_stride_vec) { const unsigned dims = bld->dims; LLVMValueRef ilevel_vec; - LLVMValueRef size_vec; - LLVMTypeRef i32t = LLVMInt32Type(); ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); /* * Compute width, height, depth at mipmap level 'ilevel' */ - size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); - *out_width_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 0, 0)); if (dims >= 2) { - - *out_height_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 1, 0)); - *row_stride_vec = lp_build_get_level_stride_vec(bld, bld->row_stride_array, ilevel); @@ -668,18 +722,90 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, *img_stride_vec = lp_build_get_level_stride_vec(bld, bld->img_stride_array, ilevel); - if (dims == 3) { - *out_depth_vec = lp_build_extract_broadcast(bld->builder, - bld->int_size_type, - bld->int_coord_type, - size_vec, - LLVMConstInt(i32t, 2, 0)); - } } } } +/** + * Extract and broadcast texture size. + * + * @param size_type type of the texture size vector (either + * bld->int_size_type or bld->float_size_type) + * @param coord_type type of the texture size vector (either + * bld->int_coord_type or bld->coord_type) + * @param int_size vector with the integer texture size (width, height, + * depth) + */ +void +lp_build_extract_image_sizes(struct lp_build_sample_context *bld, + struct lp_type size_type, + struct lp_type coord_type, + LLVMValueRef size, + LLVMValueRef *out_width, + LLVMValueRef *out_height, + LLVMValueRef *out_depth) +{ + const unsigned dims = bld->dims; + LLVMTypeRef i32t = LLVMInt32Type(); + + *out_width = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->builder, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); + } + } +} + + +/** + * Unnormalize coords. + * + * @param int_size vector with the integer texture size (width, height, depth) + */ +void +lp_build_unnormalized_coords(struct lp_build_sample_context *bld, + LLVMValueRef flt_size, + LLVMValueRef *s, + LLVMValueRef *t, + LLVMValueRef *r) +{ + const unsigned dims = bld->dims; + LLVMValueRef width; + LLVMValueRef height; + LLVMValueRef depth; + + lp_build_extract_image_sizes(bld, + bld->float_size_type, + bld->coord_type, + flt_size, + &width, + &height, + &depth); + + /* s = s * width, t = t * height */ + *s = lp_build_mul(&bld->coord_bld, *s, width); + if (dims >= 2) { + *t = lp_build_mul(&bld->coord_bld, *t, height); + if (dims >= 3) { + *r = lp_build_mul(&bld->coord_bld, *r, depth); + } + } +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef @@ -798,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); { - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; + LLVMValueRef face_s_var; + LLVMValueRef face_t_var; + LLVMValueRef face_var; - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; - - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); + face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var"); + face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var"); + face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var"); - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); @@ -826,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx); { - struct lp_build_flow_context *flow_ctx2; struct lp_build_if_state if_ctx2; - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, + *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_else(&if_ctx2); { /* +/- Z face */ LLVMValueRef sign = lp_build_sgn(float_bld, rz); LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, + *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); + LLVMBuildStore(bld->builder, *face_s, face_s_var); + LLVMBuildStore(bld->builder, *face_t, face_t_var); + LLVMBuildStore(bld->builder, *face, face_var); } lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; } lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); + + *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s"); + *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t"); + *face = LLVMBuildLoad(bld->builder, face_var, "face"); } } |