summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c138
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h13
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c257
3 files changed, 368 insertions, 40 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 1c352006f3e..a032d9d6895 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -1402,6 +1402,144 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
}
}
+/**
+ * Generate new coords and faces for cubemap texels falling off the face.
+ *
+ * @param face face (center) of the pixel
+ * @param x0 lower x coord
+ * @param x1 higher x coord (must be x0 + 1)
+ * @param y0 lower y coord
+ * @param y1 higher y coord (must be x0 + 1)
+ * @param max_coord texture cube (level) size - 1
+ * @param next_faces new face values when falling off
+ * @param next_xcoords new x coord values when falling off
+ * @param next_ycoords new y coord values when falling off
+ *
+ * The arrays hold the new values when under/overflow of
+ * lower x, higher x, lower y, higher y coord would occur (in this order).
+ * next_xcoords/next_ycoords have two entries each (for both new lower and
+ * higher coord).
+ */
+void
+lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
+ LLVMValueRef face,
+ LLVMValueRef x0,
+ LLVMValueRef x1,
+ LLVMValueRef y0,
+ LLVMValueRef y1,
+ LLVMValueRef max_coord,
+ LLVMValueRef next_faces[4],
+ LLVMValueRef next_xcoords[4][2],
+ LLVMValueRef next_ycoords[4][2])
+{
+ /*
+ * Lookup tables aren't nice for simd code hence try some logic here.
+ * (Note that while it would not be necessary to do per-sample (4) lookups
+ * when using a LUT as it's impossible that texels fall off of positive
+ * and negative edges simultaneously, it would however be necessary to
+ * do 2 lookups for corner handling as in this case texels both fall off
+ * of x and y axes.)
+ */
+ /*
+ * Next faces (for face 012345):
+ * x < 0.0 : 451110
+ * x >= 1.0 : 540001
+ * y < 0.0 : 225422
+ * y >= 1.0 : 334533
+ * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
+ * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
+ * nfy+: face & ~4 > 1 ? face + 2 : 3;
+ * This could also use pshufb instead, but would need (manually coded)
+ * ssse3 intrinsic (llvm won't do non-constant shuffles).
+ */
+ struct gallivm_state *gallivm = ivec_bld->gallivm;
+ LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
+ LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
+ LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
+ LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
+ LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
+ LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
+
+ sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
+ tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
+ sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
+ faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
+ tmp = lp_build_add(ivec_bld, faceand1, c4);
+ next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
+ next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
+
+ tmp = lp_build_andnot(ivec_bld, face, c4);
+ sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
+ tmp = lp_build_add(ivec_bld, face, c2);
+ next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
+ next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
+
+ /*
+ * new xcoords (for face 012345):
+ * x < 0.0 : max max t max-t max max
+ * x >= 1.0 : 0 0 max-t t 0 0
+ * y < 0.0 : max 0 max-s s s max-s
+ * y >= 1.0 : max 0 s max-s s max-s
+ *
+ * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
+ * ncx[0] = max - ncx[1]
+ * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
+ * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
+ */
+ sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
+ maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
+ tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
+ next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
+ next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
+ maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
+ tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
+ next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
+ next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
+
+ sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
+
+ tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
+ maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
+ tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
+ next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
+ tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
+ next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
+ maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
+ tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
+ next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
+ tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
+ next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
+
+ /*
+ * new ycoords (for face 012345):
+ * x < 0.0 : t t 0 max t t
+ * x >= 1.0 : t t 0 max t t
+ * y < 0.0 : max-s s 0 max max 0
+ * y >= 1.0 : s max-s 0 max 0 max
+ *
+ * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
+ * ncy[1] = ncy[0]
+ * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
+ * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
+ */
+ tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
+ next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
+ next_ycoords[1][0] = next_ycoords[0][0];
+ next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
+ next_ycoords[1][1] = next_ycoords[0][1];
+
+ tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
+ tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
+ next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
+ tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
+ next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
+ tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
+ tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
+ next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
+ tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
+ next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
+}
+
/** Helper used by lp_build_cube_lookup() */
static LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 70f03503f0f..5039128a203 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -464,6 +464,19 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
void
+lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
+ LLVMValueRef face,
+ LLVMValueRef x0,
+ LLVMValueRef x1,
+ LLVMValueRef y0,
+ LLVMValueRef y1,
+ LLVMValueRef max_coord,
+ LLVMValueRef new_faces[4],
+ LLVMValueRef new_xcoords[4][2],
+ LLVMValueRef new_ycoords[4][2]);
+
+
+void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
LLVMValueRef coord,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 54dee25bfd9..8e2d0d9f33b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -848,10 +848,14 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef flt_width_vec;
LLVMValueRef flt_height_vec;
LLVMValueRef flt_depth_vec;
- LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL;
+ LLVMValueRef z1 = NULL;
+ LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
+ LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
+ LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
+ LLVMValueRef xs[4], ys[4], zs[4];
LLVMValueRef neighbors[2][2][4];
- int chan;
+ int chan, texel_index;
lp_build_extract_image_sizes(bld,
&bld->int_size_bld,
@@ -870,39 +874,202 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/*
* Compute integer texcoords.
*/
- lp_build_sample_wrap_linear(bld, coords[0], width_vec,
- flt_width_vec, offsets[0],
- bld->static_texture_state->pot_width,
- bld->static_sampler_state->wrap_s,
- &x0, &x1, &s_fpart);
- lp_build_name(x0, "tex.x0.wrapped");
- lp_build_name(x1, "tex.x1.wrapped");
- if (dims >= 2) {
- lp_build_sample_wrap_linear(bld, coords[1], height_vec,
- flt_height_vec, offsets[1],
- bld->static_texture_state->pot_height,
- bld->static_sampler_state->wrap_t,
- &y0, &y1, &t_fpart);
- lp_build_name(y0, "tex.y0.wrapped");
- lp_build_name(y1, "tex.y1.wrapped");
+ if (bld->static_texture_state->target != PIPE_TEXTURE_CUBE ||
+ !bld->static_sampler_state->seamless_cube_map) {
+ lp_build_sample_wrap_linear(bld, coords[0], width_vec,
+ flt_width_vec, offsets[0],
+ bld->static_texture_state->pot_width,
+ bld->static_sampler_state->wrap_s,
+ &x00, &x01, &s_fpart);
+ lp_build_name(x00, "tex.x0.wrapped");
+ lp_build_name(x01, "tex.x1.wrapped");
+ x10 = x00;
+ x11 = x01;
- if (dims == 3) {
- lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
- flt_depth_vec, offsets[2],
- bld->static_texture_state->pot_depth,
- bld->static_sampler_state->wrap_r,
- &z0, &z1, &r_fpart);
- lp_build_name(z0, "tex.z0.wrapped");
- lp_build_name(z1, "tex.z1.wrapped");
+ if (dims >= 2) {
+ lp_build_sample_wrap_linear(bld, coords[1], height_vec,
+ flt_height_vec, offsets[1],
+ bld->static_texture_state->pot_height,
+ bld->static_sampler_state->wrap_t,
+ &y00, &y10, &t_fpart);
+ lp_build_name(y00, "tex.y0.wrapped");
+ lp_build_name(y10, "tex.y1.wrapped");
+ y01 = y00;
+ y11 = y10;
+
+ if (dims == 3) {
+ lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
+ flt_depth_vec, offsets[2],
+ bld->static_texture_state->pot_depth,
+ bld->static_sampler_state->wrap_r,
+ &z00, &z1, &r_fpart);
+ z01 = z10 = z11 = z00;
+ lp_build_name(z00, "tex.z0.wrapped");
+ lp_build_name(z1, "tex.z1.wrapped");
+ }
+ }
+ if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
+ bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
+ bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
+ z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
+ lp_build_name(z00, "tex.z0.layer");
+ lp_build_name(z1, "tex.z1.layer");
}
}
- if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
- bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
- bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
- z0 = z1 = coords[2]; /* cube face or layer */
- lp_build_name(z0, "tex.z0.layer");
- lp_build_name(z1, "tex.z1.layer");
+ else {
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ struct lp_build_context *ivec_bld = &bld->int_coord_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ struct lp_build_if_state edge_if;
+ LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
+ LLVMValueRef fall_off[4], coord, have_edge;
+ LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp;
+ LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
+ LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
+ LLVMValueRef face = coords[2];
+ LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
+ LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
+ /* XXX drop height calcs. Could (should) do this without seamless filtering too */
+ height_vec = width_vec;
+ flt_height_vec = flt_width_vec;
+
+ /* XXX the overflow logic is actually sort of duplicated with trilinear,
+ * since an overflow in one mip should also have a corresponding overflow
+ * in another.
+ */
+ /* should always have normalized coords, and offsets are undefined */
+ assert(bld->static_sampler_state->normalized_coords);
+ coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
+ /* instead of clamp, build mask if overflowed */
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* convert to int, compute lerp weight */
+ /* not ideal with AVX (and no AVX2) */
+ lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
+ x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
+ coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
+ coord = lp_build_sub(coord_bld, coord, half);
+ lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
+ y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
+
+ fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
+ fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
+ fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
+ fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
+
+ have_edge = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
+ have_edge = lp_build_or(ivec_bld, have_edge, fall_off[2]);
+ have_edge = lp_build_or(ivec_bld, have_edge, fall_off[3]);
+
+ have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
+
+ for (texel_index = 0; texel_index < 4; texel_index++) {
+ xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
+ ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
+ zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
+ }
+
+ lp_build_if(&edge_if, bld->gallivm, have_edge);
+
+ /*
+ * Need to feed clamped values here for cheap corner handling,
+ * but only for y coord (as when falling off both edges we only
+ * fall off the x one) - this should be sufficient.
+ */
+ y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
+ y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
+
+ /*
+ * Get all possible new coords.
+ */
+ lp_build_cube_new_coords(ivec_bld, face,
+ x0, x1, y0_clamped, y1_clamped,
+ length_minus_one,
+ new_faces, new_xcoords, new_ycoords);
+
+ /* handle fall off x-, x+ direction */
+ /* determine new coords, face (not both fall_off vars can be true at same time) */
+ x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
+ y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
+ x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
+ y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
+ x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
+ y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
+ x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
+ y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
+
+ z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
+ z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
+
+ /* handle fall off y-, y+ direction */
+ /*
+ * Cheap corner logic: just hack up things so a texel doesn't fall
+ * off both sides (which means filter weights will be wrong but we'll only
+ * use valid texels in the filter).
+ * This means however (y) coords must additionally be clamped (see above).
+ * This corner handling should be fully OpenGL (but not d3d10) compliant.
+ */
+ fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
+ fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
+ fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
+ fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
+
+ x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
+ y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
+ x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
+ y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
+ x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
+ y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
+ x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
+ y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
+
+ z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
+ z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
+ z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
+ z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
+
+ LLVMBuildStore(builder, x00, xs[0]);
+ LLVMBuildStore(builder, x01, xs[1]);
+ LLVMBuildStore(builder, x10, xs[2]);
+ LLVMBuildStore(builder, x11, xs[3]);
+ LLVMBuildStore(builder, y00, ys[0]);
+ LLVMBuildStore(builder, y01, ys[1]);
+ LLVMBuildStore(builder, y10, ys[2]);
+ LLVMBuildStore(builder, y11, ys[3]);
+ LLVMBuildStore(builder, z00, zs[0]);
+ LLVMBuildStore(builder, z01, zs[1]);
+ LLVMBuildStore(builder, z10, zs[2]);
+ LLVMBuildStore(builder, z11, zs[3]);
+
+ lp_build_else(&edge_if);
+
+ LLVMBuildStore(builder, x0, xs[0]);
+ LLVMBuildStore(builder, x1, xs[1]);
+ LLVMBuildStore(builder, x0, xs[2]);
+ LLVMBuildStore(builder, x1, xs[3]);
+ LLVMBuildStore(builder, y0, ys[0]);
+ LLVMBuildStore(builder, y0, ys[1]);
+ LLVMBuildStore(builder, y1, ys[2]);
+ LLVMBuildStore(builder, y1, ys[3]);
+ LLVMBuildStore(builder, face, zs[0]);
+ LLVMBuildStore(builder, face, zs[1]);
+ LLVMBuildStore(builder, face, zs[2]);
+ LLVMBuildStore(builder, face, zs[3]);
+
+ lp_build_endif(&edge_if);
+
+ x00 = LLVMBuildLoad(builder, xs[0], "");
+ x01 = LLVMBuildLoad(builder, xs[1], "");
+ x10 = LLVMBuildLoad(builder, xs[2], "");
+ x11 = LLVMBuildLoad(builder, xs[3], "");
+ y00 = LLVMBuildLoad(builder, ys[0], "");
+ y01 = LLVMBuildLoad(builder, ys[1], "");
+ y10 = LLVMBuildLoad(builder, ys[2], "");
+ y11 = LLVMBuildLoad(builder, ys[3], "");
+ z00 = LLVMBuildLoad(builder, zs[0], "");
+ z01 = LLVMBuildLoad(builder, zs[1], "");
+ z10 = LLVMBuildLoad(builder, zs[2], "");
+ z11 = LLVMBuildLoad(builder, zs[3], "");
}
if (linear_mask) {
@@ -937,12 +1104,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/* get x0/x1 texels */
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x0, y0, z0,
+ x00, y00, z00,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[0][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x1, y0, z0,
+ x01, y01, z01,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[0][1]);
@@ -973,12 +1140,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/* get x0/x1 texels at y1 */
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x0, y1, z0,
+ x10, y10, z10,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x1, y1, z0,
+ x11, y11, z11,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][1]);
@@ -1012,22 +1179,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/* get x0/x1/y0/y1 texels at z1 */
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x0, y0, z1,
+ x00, y00, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[0][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x1, y0, z1,
+ x01, y01, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[0][1]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x0, y1, z1,
+ x10, y10, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[1][0]);
lp_build_sample_texel_soa(bld,
width_vec, height_vec, depth_vec,
- x1, y1, z1,
+ x11, y11, z1,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[1][1]);
@@ -2306,15 +2473,25 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
}
}
+ if (static_texture_state->target == PIPE_TEXTURE_CUBE &&
+ derived_sampler_state.seamless_cube_map &&
+ (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
+ derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
+ /* theoretically possible with AoS filtering but not implemented (complex!) */
+ use_aos = 0;
+ }
if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
!use_aos && util_format_fits_8unorm(bld.format_desc)) {
debug_printf("%s: using floating point linear filtering for %s\n",
__FUNCTION__, bld.format_desc->short_name);
- debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d wrapr %d\n",
+ debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
+ " wraps %d wrapt %d wrapr %d\n",
derived_sampler_state.min_img_filter,
derived_sampler_state.mag_img_filter,
derived_sampler_state.min_mip_filter,
+ static_texture_state->target,
+ derived_sampler_state.seamless_cube_map,
derived_sampler_state.wrap_s,
derived_sampler_state.wrap_t,
derived_sampler_state.wrap_r);