summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c164
1 files changed, 151 insertions, 13 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8e2d0d9f33b..2d833318aee 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -822,6 +822,12 @@ lp_build_masklerp2d(struct lp_build_context *bld,
return lp_build_lerp(bld, weight1, val0, val1, 0);
}
+/*
+ * this is a bit excessive code for something OpenGL just recommends
+ * but does not require.
+ */
+#define ACCURATE_CUBE_CORNERS 1
+
/**
* Generate code to sample a mipmap level with linear filtering.
* If sampling a cube texture, r = cube face in [0,5].
@@ -840,6 +846,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
const LLVMValueRef *offsets,
LLVMValueRef colors_out[4])
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ struct lp_build_context *ivec_bld = &bld->int_coord_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
const unsigned dims = bld->dims;
LLVMValueRef width_vec;
LLVMValueRef height_vec;
@@ -848,6 +857,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef flt_width_vec;
LLVMValueRef flt_height_vec;
LLVMValueRef flt_depth_vec;
+ LLVMValueRef fall_off[4], have_corners;
LLVMValueRef z1 = NULL;
LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
@@ -856,6 +866,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef xs[4], ys[4], zs[4];
LLVMValueRef neighbors[2][2][4];
int chan, texel_index;
+ boolean seamless_cube_filter, accurate_cube_corners;
+
+ seamless_cube_filter = bld->static_texture_state->target == PIPE_TEXTURE_CUBE &&
+ bld->static_sampler_state->seamless_cube_map;
+ accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
lp_build_extract_image_sizes(bld,
&bld->int_size_bld,
@@ -875,8 +890,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
* Compute integer texcoords.
*/
- if (bld->static_texture_state->target != PIPE_TEXTURE_CUBE ||
- !bld->static_sampler_state->seamless_cube_map) {
+ if (!seamless_cube_filter) {
lp_build_sample_wrap_linear(bld, coords[0], width_vec,
flt_width_vec, offsets[0],
bld->static_texture_state->pot_width,
@@ -918,13 +932,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
}
else {
- LLVMBuilderRef builder = bld->gallivm->builder;
- struct lp_build_context *ivec_bld = &bld->int_coord_bld;
- struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_if_state edge_if;
+ LLVMTypeRef int1t;
LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
- LLVMValueRef fall_off[4], coord, have_edge;
- LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp;
+ LLVMValueRef coord, have_edge, have_corner;
+ LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
LLVMValueRef face = coords[2];
@@ -957,12 +969,15 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
- have_edge = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
- have_edge = lp_build_or(ivec_bld, have_edge, fall_off[2]);
- have_edge = lp_build_or(ivec_bld, have_edge, fall_off[3]);
-
+ fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
+ fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
+ have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
+ /* needed for accurate corner filtering branch later, rely on 0 init */
+ int1t = LLVMInt1TypeInContext(bld->gallivm->context);
+ have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
+
for (texel_index = 0; texel_index < 4; texel_index++) {
xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
@@ -971,6 +986,10 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
lp_build_if(&edge_if, bld->gallivm, have_edge);
+ have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
+ have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
+ LLVMBuildStore(builder, have_corner, have_corners);
+
/*
* Need to feed clamped values here for cheap corner handling,
* but only for y coord (as when falling off both edges we only
@@ -1074,7 +1093,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
if (linear_mask) {
/*
- * Whack filter weights into place. Whatever pixel had more weight is
+ * Whack filter weights into place. Whatever texel had more weight is
* the one which should have been selected by nearest filtering hence
* just use 100% weight for it.
*/
@@ -1135,7 +1154,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
else {
/* 2D/3D texture */
- LLVMValueRef colors0[4];
+ struct lp_build_if_state corner_if;
+ LLVMValueRef colors0[4], colorss[4];
/* get x0/x1 texels at y1 */
lp_build_sample_texel_soa(bld,
@@ -1149,6 +1169,110 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][1]);
+ /*
+ * To avoid having to duplicate linear_mask / fetch code use
+ * another branch (with corner condition though edge would work
+ * as well) here.
+ */
+ if (accurate_cube_corners) {
+ LLVMValueRef w00, w01, w10, w11, wx0, wy0;
+ LLVMValueRef c_weight, c00, c01, c10, c11;
+ LLVMValueRef have_corner, one_third, tmp;
+
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+
+ have_corner = LLVMBuildLoad(builder, have_corners, "");
+
+ lp_build_if(&corner_if, bld->gallivm, have_corner);
+
+ /*
+ * we can't use standard 2d lerp as we need per-element weight
+ * in case of corners, so just calculate bilinear result as
+ * w00*s00 + w01*s01 + w10*s10 + w11*s11.
+ * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
+ * however calculating the weights needs another 6, so actually probably
+ * not slower than 2d lerp only for 4 channels as weights only need
+ * to be calculated once - of course fixing the weights has additional cost.)
+ */
+ wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
+ wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
+ w00 = lp_build_mul(coord_bld, wx0, wy0);
+ w01 = lp_build_mul(coord_bld, s_fpart, wy0);
+ w10 = lp_build_mul(coord_bld, wx0, t_fpart);
+ w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
+
+ /* find corner weight */
+ c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
+ c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
+ c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+ c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
+ c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+ c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
+ c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
+
+ /*
+ * add 1/3 of the corner weight to each of the 3 other samples
+ * and null out corner weight
+ */
+ one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
+ c_weight = lp_build_mul(coord_bld, c_weight, one_third);
+ w00 = lp_build_add(coord_bld, w00, c_weight);
+ c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
+ w00 = lp_build_andnot(coord_bld, w00, c00);
+ w01 = lp_build_add(coord_bld, w01, c_weight);
+ c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
+ w01 = lp_build_andnot(coord_bld, w01, c01);
+ w10 = lp_build_add(coord_bld, w10, c_weight);
+ c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
+ w10 = lp_build_andnot(coord_bld, w10, c10);
+ w11 = lp_build_add(coord_bld, w11, c_weight);
+ c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
+ w11 = lp_build_andnot(coord_bld, w11, c11);
+
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
+ tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+ /* inputs to interpolation are just masks so just add masked weights together */
+ cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
+ cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
+ cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
+ cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
+ colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
+ tmp = lp_build_and(coord_bld, w01, cmpval01);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w10, cmpval10);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w11, cmpval11);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
+ }
+
+ LLVMBuildStore(builder, colors0[0], colorss[0]);
+ LLVMBuildStore(builder, colors0[1], colorss[1]);
+ LLVMBuildStore(builder, colors0[2], colorss[2]);
+ LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+ lp_build_else(&corner_if);
+ }
+
if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
/* Bilinear interpolate the four samples from the 2D image / 3D slice */
for (chan = 0; chan < 4; chan++) {
@@ -1172,6 +1296,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
colors0[1] = colors0[2] = colors0[3] = colors0[0];
}
+ if (accurate_cube_corners) {
+ LLVMBuildStore(builder, colors0[0], colorss[0]);
+ LLVMBuildStore(builder, colors0[1], colorss[1]);
+ LLVMBuildStore(builder, colors0[2], colorss[2]);
+ LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+ lp_build_endif(&corner_if);
+
+ colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
+ colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
+ colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
+ colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
+ }
+
if (dims == 3) {
LLVMValueRef neighbors1[2][2][4];
LLVMValueRef colors1[4];