diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 60 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 28 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 321 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 15 |
4 files changed, 184 insertions, 240 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 8f8410c015b..3291ec40af5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -974,7 +974,7 @@ lp_build_lerp_simple(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1, - bool normalized) + unsigned flags) { unsigned half_width = bld->type.width/2; LLVMBuilderRef builder = bld->gallivm->builder; @@ -987,14 +987,17 @@ lp_build_lerp_simple(struct lp_build_context *bld, delta = lp_build_sub(bld, v1, v0); - if (normalized) { + if (flags & LP_BLD_LERP_WIDE_NORMALIZED) { if (!bld->type.sign) { - /* - * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the - * most-significant-bit to the lowest-significant-bit, so that - * later we can just divide by 2**n instead of 2**n - 1. - */ - x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1)); + if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) { + /* + * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the + * most-significant-bit to the lowest-significant-bit, so that + * later we can just divide by 2**n instead of 2**n - 1. + */ + + x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1)); + } /* (x * delta) >> n */ res = lp_build_mul(bld, x, delta); @@ -1005,15 +1008,18 @@ lp_build_lerp_simple(struct lp_build_context *bld, * use the 2**n - 1 divison approximation in lp_build_mul_norm * instead. */ + assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)); res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta); } } else { + assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)); res = lp_build_mul(bld, x, delta); } res = lp_build_add(bld, v0, res); - if ((normalized && !bld->type.sign) || bld->type.fixed) { + if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) || + bld->type.fixed) { /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */ /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, * but it will be wrong for true fixed point use cases. Basically we need @@ -1033,7 +1039,8 @@ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, - LLVMValueRef v1) + LLVMValueRef v1, + unsigned flags) { const struct lp_type type = bld->type; LLVMValueRef res; @@ -1042,6 +1049,8 @@ lp_build_lerp(struct lp_build_context *bld, assert(lp_check_value(type, v0)); assert(lp_check_value(type, v1)); + assert(!(flags & LP_BLD_LERP_WIDE_NORMALIZED)); + if (type.norm) { struct lp_type wide_type; struct lp_build_context wide_bld; @@ -1068,18 +1077,25 @@ lp_build_lerp(struct lp_build_context *bld, * Lerp both halves. */ - resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, TRUE); - resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, TRUE); + flags |= LP_BLD_LERP_WIDE_NORMALIZED; + + resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, flags); + resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, flags); res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh); } else { - res = lp_build_lerp_simple(bld, x, v0, v1, FALSE); + res = lp_build_lerp_simple(bld, x, v0, v1, flags); } return res; } +/** + * Bilinear interpolation. + * + * Values indices are in v_{yx}. + */ LLVMValueRef lp_build_lerp_2d(struct lp_build_context *bld, LLVMValueRef x, @@ -1087,11 +1103,12 @@ lp_build_lerp_2d(struct lp_build_context *bld, LLVMValueRef v00, LLVMValueRef v01, LLVMValueRef v10, - LLVMValueRef v11) + LLVMValueRef v11, + unsigned flags) { - LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); - LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); - return lp_build_lerp(bld, y, v0, v1); + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01, flags); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11, flags); + return lp_build_lerp(bld, y, v0, v1, flags); } @@ -1107,11 +1124,12 @@ lp_build_lerp_3d(struct lp_build_context *bld, LLVMValueRef v100, LLVMValueRef v101, LLVMValueRef v110, - LLVMValueRef v111) + LLVMValueRef v111, + unsigned flags) { - LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011); - LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111); - return lp_build_lerp(bld, z, v0, v1); + LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011, flags); + LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111, flags); + return lp_build_lerp(bld, z, v0, v1, flags); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 45886d5fd99..966796c3c4d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -85,17 +85,27 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); + +/** + * Set when the weights for normalized are prescaled, that is, in range + * 0..2**n, as opposed to range 0..2**(n-1). + */ +#define LP_BLD_LERP_PRESCALED_WEIGHTS (1 << 0) + +/** + * Used internally when using wide intermediates for normalized lerps. + * + * Do not use. + */ +#define LP_BLD_LERP_WIDE_NORMALIZED (1 << 1) + LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, - LLVMValueRef v1); + LLVMValueRef v1, + unsigned flags); -/** - * Bilinear interpolation. - * - * Values indices are in v_{yx}. - */ LLVMValueRef lp_build_lerp_2d(struct lp_build_context *bld, LLVMValueRef x, @@ -103,7 +113,8 @@ lp_build_lerp_2d(struct lp_build_context *bld, LLVMValueRef v00, LLVMValueRef v01, LLVMValueRef v10, - LLVMValueRef v11); + LLVMValueRef v11, + unsigned flags); LLVMValueRef lp_build_lerp_3d(struct lp_build_context *bld, @@ -117,7 +128,8 @@ lp_build_lerp_3d(struct lp_build_context *bld, LLVMValueRef v100, LLVMValueRef v101, LLVMValueRef v110, - LLVMValueRef v111); + LLVMValueRef v111, + unsigned flags); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 9eaca029fda..c31b05d7022 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -496,8 +496,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef offset, LLVMValueRef x_subcoord, LLVMValueRef y_subcoord, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { /* * Fetch the pixels as 4 x 32bit (rgba order might differ): @@ -517,10 +516,9 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, */ LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef rgba8; - struct lp_build_context h16, u8n; + struct lp_build_context u8n; LLVMTypeRef u8n_vec_type; - lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); @@ -546,10 +544,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, y_subcoord); } - /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(bld->gallivm, u8n.type, h16.type, - rgba8, - colors_lo, colors_hi); + *colors = rgba8; } @@ -569,8 +564,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *offsets, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; @@ -694,7 +688,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, x_subcoord, y_subcoord, - colors_lo, colors_hi); + colors); } @@ -716,8 +710,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *offsets, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { const unsigned dims = bld->dims; LLVMValueRef width_vec, height_vec, depth_vec; @@ -787,7 +780,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, x_subcoord, y_subcoord, - colors_lo, colors_hi); + colors); } @@ -804,29 +797,21 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, LLVMValueRef s_fpart, LLVMValueRef t_fpart, LLVMValueRef r_fpart, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; - struct lp_build_context h16, u8n; - LLVMTypeRef h16_vec_type, u8n_vec_type; + struct lp_build_context u8n; + LLVMTypeRef u8n_vec_type; LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); - LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffle_lo, shuffle_hi; - LLVMValueRef s_fpart_lo, s_fpart_hi; - LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL; - LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL; - LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */ - LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */ - LLVMValueRef packed_lo, packed_hi; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle; + LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */ + LLVMValueRef packed; unsigned i, j, k; unsigned numj, numk; - lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); - h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); /* @@ -834,59 +819,45 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, * * s_fpart = {s0, s1, s2, s3} * - * into 8 x i16 - * - * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * where each value is between 0 and 0xff, * - * into two 8 x i16 + * into one 16 x i20 * - * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} - * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3} * * and likewise for t_fpart. There is no risk of loosing precision here * since the fractional parts only use the lower 8bits. */ - s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, ""); if (dims >= 2) - t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, ""); if (dims >= 3) - r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); + r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, ""); - for (j = 0; j < h16.type.length; j += 4) { + for (j = 0; j < u8n.type.length; j += 4) { #ifdef PIPE_ARCH_LITTLE_ENDIAN unsigned subindex = 0; #else - unsigned subindex = 1; + unsigned subindex = 3; #endif LLVMValueRef index; - index = LLVMConstInt(elem_type, j/2 + subindex, 0); - for (i = 0; i < 4; ++i) - shuffles_lo[j + i] = index; - - index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + index = LLVMConstInt(elem_type, j + subindex, 0); for (i = 0; i < 4; ++i) - shuffles_hi[j + i] = index; + shuffles[j + i] = index; } - shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); - shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + shuffle = LLVMConstVector(shuffles, u8n.type.length); - s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, - shuffle_lo, ""); - s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, - shuffle_hi, ""); + s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef, + shuffle, ""); if (dims >= 2) { - t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, - shuffle_lo, ""); - t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, - shuffle_hi, ""); + t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef, + shuffle, ""); } if (dims >= 3) { - r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, - shuffle_lo, ""); - r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, - shuffle_hi, ""); + r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef, + shuffle, ""); } /* @@ -935,10 +906,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, y_subcoord[j]); } - /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(bld->gallivm, u8n.type, h16.type, - rgba8, - &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); + neighbors[k][j][i] = rgba8; } } } @@ -948,84 +916,55 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, */ if (bld->static_sampler_state->force_nearest_s) { /* special case 1-D lerp */ - packed_lo = lp_build_lerp(&h16, - t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - t_fpart_hi, - neighbors_hi[0][1][0], - neighbors_hi[0][1][0]); + packed = lp_build_lerp(&u8n, + t_fpart, + neighbors[0][0][0], + neighbors[0][0][1], + LP_BLD_LERP_PRESCALED_WEIGHTS); } else if (bld->static_sampler_state->force_nearest_t) { /* special case 1-D lerp */ - packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); + packed = lp_build_lerp(&u8n, + s_fpart, + neighbors[0][0][0], + neighbors[0][0][1], + LP_BLD_LERP_PRESCALED_WEIGHTS); } else { /* general 1/2/3-D lerping */ if (dims == 1) { - packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); + packed = lp_build_lerp(&u8n, + s_fpart, + neighbors[0][0][0], + neighbors[0][0][1], + LP_BLD_LERP_PRESCALED_WEIGHTS); } else if (dims == 2) { /* 2-D lerp */ - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1], - neighbors_lo[0][1][0], - neighbors_lo[0][1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1], - neighbors_hi[0][1][0], - neighbors_hi[0][1][1]); + packed = lp_build_lerp_2d(&u8n, + s_fpart, t_fpart, + neighbors[0][0][0], + neighbors[0][0][1], + neighbors[0][1][0], + neighbors[0][1][1], + LP_BLD_LERP_PRESCALED_WEIGHTS); } else { /* 3-D lerp */ assert(dims == 3); - packed_lo = lp_build_lerp_3d(&h16, - s_fpart_lo, t_fpart_lo, r_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1], - neighbors_lo[0][1][0], - neighbors_lo[0][1][1], - neighbors_lo[1][0][0], - neighbors_lo[1][0][1], - neighbors_lo[1][1][0], - neighbors_lo[1][1][1]); - - packed_hi = lp_build_lerp_3d(&h16, - s_fpart_hi, t_fpart_hi, r_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1], - neighbors_hi[0][1][0], - neighbors_hi[0][1][1], - neighbors_hi[1][0][0], - neighbors_hi[1][0][1], - neighbors_hi[1][1][0], - neighbors_hi[1][1][1]); + packed = lp_build_lerp_3d(&u8n, + s_fpart, t_fpart, r_fpart, + neighbors[0][0][0], + neighbors[0][0][1], + neighbors[0][1][0], + neighbors[0][1][1], + neighbors[1][0][0], + neighbors[1][0][1], + neighbors[1][1][0], + neighbors[1][1][1], + LP_BLD_LERP_PRESCALED_WEIGHTS); } } - *colors_lo = packed_lo; - *colors_hi = packed_hi; + *colors = packed; } /** @@ -1043,8 +982,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *offsets, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->gallivm->builder; @@ -1223,7 +1161,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_sample_fetch_image_linear(bld, data_ptr, offset, x_subcoord, y_subcoord, s_fpart, t_fpart, r_fpart, - colors_lo, colors_hi); + colors); } @@ -1244,8 +1182,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *offsets, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef *colors) { const unsigned dims = bld->dims; LLVMValueRef width_vec, height_vec, depth_vec; @@ -1395,7 +1332,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, lp_build_sample_fetch_image_linear(bld, data_ptr, offset, x_subcoord, y_subcoord, s_fpart, t_fpart, r_fpart, - colors_lo, colors_hi); + colors); } @@ -1416,8 +1353,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef colors_lo_var, - LLVMValueRef colors_hi_var) + LLVMValueRef colors_var) { LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef size0; @@ -1430,8 +1366,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef data_ptr1; LLVMValueRef mipoff0 = NULL; LLVMValueRef mipoff1 = NULL; - LLVMValueRef colors0_lo, colors0_hi; - LLVMValueRef colors1_lo, colors1_hi; + LLVMValueRef colors0; + LLVMValueRef colors1; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, @@ -1452,7 +1388,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, mipoff0, s, t, r, offsets, - &colors0_lo, &colors0_hi); + &colors0); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); @@ -1460,7 +1396,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, mipoff0, s, t, r, offsets, - &colors0_lo, &colors0_hi); + &colors0); } } else { @@ -1469,7 +1405,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, mipoff0, s, t, r, offsets, - &colors0_lo, &colors0_hi); + &colors0); } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); @@ -1477,13 +1413,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size0, row_stride0_vec, img_stride0_vec, data_ptr0, mipoff0, s, t, r, offsets, - &colors0_lo, &colors0_hi); + &colors0); } } /* Store the first level's colors in the output variables */ - LLVMBuildStore(builder, colors0_lo, colors_lo_var); - LLVMBuildStore(builder, colors0_hi, colors_hi_var); + LLVMBuildStore(builder, colors0, colors_var); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, @@ -1522,9 +1457,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_if(&if_ctx, bld->gallivm, need_lerp); { - struct lp_build_context h16_bld; + struct lp_build_context u8n_bld; - lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); + lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); /* sample the second mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel1, @@ -1547,14 +1482,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, s, t, r, offsets, - &colors1_lo, &colors1_hi); + &colors1); } else { lp_build_sample_image_linear_afloat(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, s, t, r, offsets, - &colors1_lo, &colors1_hi); + &colors1); } } else { @@ -1563,73 +1498,55 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, s, t, r, offsets, - &colors1_lo, &colors1_hi); + &colors1); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, data_ptr1, mipoff1, s, t, r, offsets, - &colors1_lo, &colors1_hi); + &colors1); } } /* interpolate samples from the two mipmap levels */ if (num_quads == 1) { - lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); - lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, ""); + lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart); #if HAVE_LLVM == 0x208 - /* This is a work-around for a bug in LLVM 2.8. + /* This was a work-around for a bug in LLVM 2.8. * Evidently, something goes wrong in the construction of the * lod_fpart short[8] vector. Adding this no-effect shuffle seems * to force the vector to be properly constructed. * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). */ - { - LLVMValueRef shuffles[8], shuffle; - assert(h16_bld.type.length <= Elements(shuffles)); - for (i = 0; i < h16_bld.type.length; i++) - shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); - shuffle = LLVMConstVector(shuffles, h16_bld.type.length); - lod_fpart = LLVMBuildShuffleVector(builder, - lod_fpart, lod_fpart, - shuffle, ""); - } +#error Unsupported #endif - - colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, - colors0_lo, colors1_lo); - colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, - colors0_hi, colors1_hi); } else { - LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16]; - struct lp_type perquadi16_type = bld->perquadi_bld.type; - perquadi16_type.width /= 2; - perquadi16_type.length *= 2; - lod_fpart = LLVMBuildBitCast(builder, lod_fpart, - lp_build_vec_type(bld->gallivm, - perquadi16_type), ""); - /* XXX this only works for exactly 2 quads. More quads need shuffle */ - assert(num_quads == 2); - for (i = 0; i < num_quads; i++) { - LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2); - lod_parts[i] = lp_build_extract_broadcast(bld->gallivm, - perquadi16_type, - h16_bld.type, - lod_fpart, - indexi2); + const unsigned num_chans_per_quad = 4 * 4; + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length); + LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; + + /* Take the LSB of lod_fpart */ + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, ""); + + /* Broadcast each lod weight into their respective channels */ + assert(u8n_bld.type.length == num_quads * num_chans_per_quad); + for (i = 0; i < u8n_bld.type.length; ++i) { + shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad); } - colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0], - colors0_lo, colors1_lo); - colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1], - colors0_hi, colors1_hi); + lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type), + LLVMConstVector(shuffle, u8n_bld.type.length), ""); } - LLVMBuildStore(builder, colors0_lo, colors_lo_var); - LLVMBuildStore(builder, colors0_hi, colors_hi_var); + colors0 = lp_build_lerp(&u8n_bld, lod_fpart, + colors0, colors1, + LP_BLD_LERP_PRESCALED_WEIGHTS); + + LLVMBuildStore(builder, colors0, colors_var); } lp_build_endif(&if_ctx); } @@ -1661,9 +1578,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const unsigned min_filter = bld->static_sampler_state->min_img_filter; const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; const unsigned dims = bld->dims; - LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef packed_var, packed; LLVMValueRef unswizzled[4]; - struct lp_build_context h16_bld; + struct lp_build_context u8n_bld; /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s)); @@ -1673,15 +1590,14 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r)); - /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); + /* make 8-bit unorm builder context */ + lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); /* * Get/interpolate texture colors. */ - packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); - packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); + packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var"); if (min_filter == mag_filter) { /* no need to distinguish between minification and magnification */ @@ -1689,7 +1605,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, min_filter, mip_filter, s, t, r, offsets, ilevel0, ilevel1, lod_fpart, - packed_lo, packed_hi); + packed_var); } else { /* Emit conditional to choose min image filter or mag image filter @@ -1722,7 +1638,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, min_filter, mip_filter, s, t, r, offsets, ilevel0, ilevel1, lod_fpart, - packed_lo, packed_hi); + packed_var); } lp_build_else(&if_ctx); { @@ -1731,19 +1647,12 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, offsets, ilevel0, NULL, NULL, - packed_lo, packed_hi); + packed_var); } lp_build_endif(&if_ctx); } - /* - * combine the values stored in 'packed_lo' and 'packed_hi' variables - * into 'packed' - */ - packed = lp_build_pack2(bld->gallivm, - h16_bld.type, lp_type_unorm(8, bld->vector_width), - LLVMBuildLoad(builder, packed_lo, ""), - LLVMBuildLoad(builder, packed_hi, "")); + packed = LLVMBuildLoad(builder, packed_var, ""); /* * Convert to SoA and swizzle. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index beefdaed513..cc29c5c885f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -822,7 +822,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (chan = 0; chan < 4; chan++) { colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, neighbors[0][0][chan], - neighbors[0][1][chan]); + neighbors[0][1][chan], + 0); } } else { @@ -848,7 +849,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, neighbors[0][0][chan], neighbors[0][1][chan], neighbors[1][0][chan], - neighbors[1][1][chan]); + neighbors[1][1][chan], + 0); } if (dims == 3) { @@ -884,14 +886,16 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, neighbors1[0][0][chan], neighbors1[0][1][chan], neighbors1[1][0][chan], - neighbors1[1][1][chan]); + neighbors1[1][1][chan], + 0); } /* Linearly interpolate the two samples from the two 3D slices */ for (chan = 0; chan < 4; chan++) { colors_out[chan] = lp_build_lerp(&bld->texel_bld, r_fpart, - colors0[chan], colors1[chan]); + colors0[chan], colors1[chan], + 0); } } else { @@ -1038,7 +1042,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, for (chan = 0; chan < 4; chan++) { colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, - colors0[chan], colors1[chan]); + colors0[chan], colors1[chan], + 0); LLVMBuildStore(builder, colors0[chan], colors_out[chan]); } } |