diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 138 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 257 |
3 files changed, 368 insertions, 40 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 1c352006f3e..a032d9d6895 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -1402,6 +1402,144 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld, } } +/** + * Generate new coords and faces for cubemap texels falling off the face. + * + * @param face face (center) of the pixel + * @param x0 lower x coord + * @param x1 higher x coord (must be x0 + 1) + * @param y0 lower y coord + * @param y1 higher y coord (must be x0 + 1) + * @param max_coord texture cube (level) size - 1 + * @param next_faces new face values when falling off + * @param next_xcoords new x coord values when falling off + * @param next_ycoords new y coord values when falling off + * + * The arrays hold the new values when under/overflow of + * lower x, higher x, lower y, higher y coord would occur (in this order). + * next_xcoords/next_ycoords have two entries each (for both new lower and + * higher coord). + */ +void +lp_build_cube_new_coords(struct lp_build_context *ivec_bld, + LLVMValueRef face, + LLVMValueRef x0, + LLVMValueRef x1, + LLVMValueRef y0, + LLVMValueRef y1, + LLVMValueRef max_coord, + LLVMValueRef next_faces[4], + LLVMValueRef next_xcoords[4][2], + LLVMValueRef next_ycoords[4][2]) +{ + /* + * Lookup tables aren't nice for simd code hence try some logic here. + * (Note that while it would not be necessary to do per-sample (4) lookups + * when using a LUT as it's impossible that texels fall off of positive + * and negative edges simultaneously, it would however be necessary to + * do 2 lookups for corner handling as in this case texels both fall off + * of x and y axes.) + */ + /* + * Next faces (for face 012345): + * x < 0.0 : 451110 + * x >= 1.0 : 540001 + * y < 0.0 : 225422 + * y >= 1.0 : 334533 + * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1 + * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1) + * nfy+: face & ~4 > 1 ? face + 2 : 3; + * This could also use pshufb instead, but would need (manually coded) + * ssse3 intrinsic (llvm won't do non-constant shuffles). + */ + struct gallivm_state *gallivm = ivec_bld->gallivm; + LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp; + LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1; + LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2); + LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3); + LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4); + LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5); + + sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5); + tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one); + sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one); + faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one); + tmp = lp_build_add(ivec_bld, faceand1, c4); + next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp); + next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one); + + tmp = lp_build_andnot(ivec_bld, face, c4); + sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one); + tmp = lp_build_add(ivec_bld, face, c2); + next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3); + next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one); + + /* + * new xcoords (for face 012345): + * x < 0.0 : max max t max-t max max + * x >= 1.0 : 0 0 max-t t 0 0 + * y < 0.0 : max 0 max-s s s max-s + * y >= 1.0 : max 0 s max-s s max-s + * + * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0 + * ncx[0] = max - ncx[1] + * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max + * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3] + */ + sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2); + maxmy0 = lp_build_sub(ivec_bld, max_coord, y0); + tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0); + next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero); + next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]); + maxmy1 = lp_build_sub(ivec_bld, max_coord, y1); + tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1); + next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero); + next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]); + + sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one); + + tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord); + maxmx0 = lp_build_sub(ivec_bld, max_coord, x0); + tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0); + next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel); + tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]); + next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]); + maxmx1 = lp_build_sub(ivec_bld, max_coord, x1); + tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1); + next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel); + tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]); + next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]); + + /* + * new ycoords (for face 012345): + * x < 0.0 : t t 0 max t t + * x >= 1.0 : t t 0 max t t + * y < 0.0 : max-s s 0 max max 0 + * y >= 1.0 : s max-s 0 max 0 max + * + * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t + * ncy[1] = ncy[0] + * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max + * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3] + */ + tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord); + next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0); + next_ycoords[1][0] = next_ycoords[0][0]; + next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1); + next_ycoords[1][1] = next_ycoords[0][1]; + + tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0); + tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero); + next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel); + tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]); + next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp); + tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1); + tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero); + next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel); + tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]); + next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp); +} + /** Helper used by lp_build_cube_lookup() */ static LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 70f03503f0f..5039128a203 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -464,6 +464,19 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, void +lp_build_cube_new_coords(struct lp_build_context *ivec_bld, + LLVMValueRef face, + LLVMValueRef x0, + LLVMValueRef x1, + LLVMValueRef y0, + LLVMValueRef y1, + LLVMValueRef max_coord, + LLVMValueRef new_faces[4], + LLVMValueRef new_xcoords[4][2], + LLVMValueRef new_ycoords[4][2]); + + +void lp_build_sample_partial_offset(struct lp_build_context *bld, unsigned block_length, LLVMValueRef coord, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 54dee25bfd9..8e2d0d9f33b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -848,10 +848,14 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef flt_width_vec; LLVMValueRef flt_height_vec; LLVMValueRef flt_depth_vec; - LLVMValueRef x0, y0 = NULL, z0 = NULL, x1, y1 = NULL, z1 = NULL; + LLVMValueRef z1 = NULL; + LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL; + LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL; + LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL; LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; + LLVMValueRef xs[4], ys[4], zs[4]; LLVMValueRef neighbors[2][2][4]; - int chan; + int chan, texel_index; lp_build_extract_image_sizes(bld, &bld->int_size_bld, @@ -870,39 +874,202 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* * Compute integer texcoords. */ - lp_build_sample_wrap_linear(bld, coords[0], width_vec, - flt_width_vec, offsets[0], - bld->static_texture_state->pot_width, - bld->static_sampler_state->wrap_s, - &x0, &x1, &s_fpart); - lp_build_name(x0, "tex.x0.wrapped"); - lp_build_name(x1, "tex.x1.wrapped"); - if (dims >= 2) { - lp_build_sample_wrap_linear(bld, coords[1], height_vec, - flt_height_vec, offsets[1], - bld->static_texture_state->pot_height, - bld->static_sampler_state->wrap_t, - &y0, &y1, &t_fpart); - lp_build_name(y0, "tex.y0.wrapped"); - lp_build_name(y1, "tex.y1.wrapped"); + if (bld->static_texture_state->target != PIPE_TEXTURE_CUBE || + !bld->static_sampler_state->seamless_cube_map) { + lp_build_sample_wrap_linear(bld, coords[0], width_vec, + flt_width_vec, offsets[0], + bld->static_texture_state->pot_width, + bld->static_sampler_state->wrap_s, + &x00, &x01, &s_fpart); + lp_build_name(x00, "tex.x0.wrapped"); + lp_build_name(x01, "tex.x1.wrapped"); + x10 = x00; + x11 = x01; - if (dims == 3) { - lp_build_sample_wrap_linear(bld, coords[2], depth_vec, - flt_depth_vec, offsets[2], - bld->static_texture_state->pot_depth, - bld->static_sampler_state->wrap_r, - &z0, &z1, &r_fpart); - lp_build_name(z0, "tex.z0.wrapped"); - lp_build_name(z1, "tex.z1.wrapped"); + if (dims >= 2) { + lp_build_sample_wrap_linear(bld, coords[1], height_vec, + flt_height_vec, offsets[1], + bld->static_texture_state->pot_height, + bld->static_sampler_state->wrap_t, + &y00, &y10, &t_fpart); + lp_build_name(y00, "tex.y0.wrapped"); + lp_build_name(y10, "tex.y1.wrapped"); + y01 = y00; + y11 = y10; + + if (dims == 3) { + lp_build_sample_wrap_linear(bld, coords[2], depth_vec, + flt_depth_vec, offsets[2], + bld->static_texture_state->pot_depth, + bld->static_sampler_state->wrap_r, + &z00, &z1, &r_fpart); + z01 = z10 = z11 = z00; + lp_build_name(z00, "tex.z0.wrapped"); + lp_build_name(z1, "tex.z1.wrapped"); + } + } + if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || + bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || + bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { + z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ + lp_build_name(z00, "tex.z0.layer"); + lp_build_name(z1, "tex.z1.layer"); } } - if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || - bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || - bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { - z0 = z1 = coords[2]; /* cube face or layer */ - lp_build_name(z0, "tex.z0.layer"); - lp_build_name(z1, "tex.z1.layer"); + else { + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_build_context *ivec_bld = &bld->int_coord_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; + struct lp_build_if_state edge_if; + LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; + LLVMValueRef fall_off[4], coord, have_edge; + LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp; + LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; + LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; + LLVMValueRef face = coords[2]; + LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f); + LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one); + /* XXX drop height calcs. Could (should) do this without seamless filtering too */ + height_vec = width_vec; + flt_height_vec = flt_width_vec; + + /* XXX the overflow logic is actually sort of duplicated with trilinear, + * since an overflow in one mip should also have a corresponding overflow + * in another. + */ + /* should always have normalized coords, and offsets are undefined */ + assert(bld->static_sampler_state->normalized_coords); + coord = lp_build_mul(coord_bld, coords[0], flt_width_vec); + /* instead of clamp, build mask if overflowed */ + coord = lp_build_sub(coord_bld, coord, half); + /* convert to int, compute lerp weight */ + /* not ideal with AVX (and no AVX2) */ + lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart); + x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); + coord = lp_build_mul(coord_bld, coords[1], flt_height_vec); + coord = lp_build_sub(coord_bld, coord, half); + lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart); + y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); + + fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); + fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one); + fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero); + fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one); + + have_edge = lp_build_or(ivec_bld, fall_off[0], fall_off[1]); + have_edge = lp_build_or(ivec_bld, have_edge, fall_off[2]); + have_edge = lp_build_or(ivec_bld, have_edge, fall_off[3]); + + have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge); + + for (texel_index = 0; texel_index < 4; texel_index++) { + xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); + ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); + zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); + } + + lp_build_if(&edge_if, bld->gallivm, have_edge); + + /* + * Need to feed clamped values here for cheap corner handling, + * but only for y coord (as when falling off both edges we only + * fall off the x one) - this should be sufficient. + */ + y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero); + y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one); + + /* + * Get all possible new coords. + */ + lp_build_cube_new_coords(ivec_bld, face, + x0, x1, y0_clamped, y1_clamped, + length_minus_one, + new_faces, new_xcoords, new_ycoords); + + /* handle fall off x-, x+ direction */ + /* determine new coords, face (not both fall_off vars can be true at same time) */ + x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0); + y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped); + x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0); + y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped); + x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1); + y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped); + x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1); + y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped); + + z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face); + z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face); + + /* handle fall off y-, y+ direction */ + /* + * Cheap corner logic: just hack up things so a texel doesn't fall + * off both sides (which means filter weights will be wrong but we'll only + * use valid texels in the filter). + * This means however (y) coords must additionally be clamped (see above). + * This corner handling should be fully OpenGL (but not d3d10) compliant. + */ + fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]); + fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]); + fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]); + fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]); + + x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00); + y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00); + x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01); + y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01); + x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10); + y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10); + x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11); + y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11); + + z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00); + z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01); + z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10); + z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11); + + LLVMBuildStore(builder, x00, xs[0]); + LLVMBuildStore(builder, x01, xs[1]); + LLVMBuildStore(builder, x10, xs[2]); + LLVMBuildStore(builder, x11, xs[3]); + LLVMBuildStore(builder, y00, ys[0]); + LLVMBuildStore(builder, y01, ys[1]); + LLVMBuildStore(builder, y10, ys[2]); + LLVMBuildStore(builder, y11, ys[3]); + LLVMBuildStore(builder, z00, zs[0]); + LLVMBuildStore(builder, z01, zs[1]); + LLVMBuildStore(builder, z10, zs[2]); + LLVMBuildStore(builder, z11, zs[3]); + + lp_build_else(&edge_if); + + LLVMBuildStore(builder, x0, xs[0]); + LLVMBuildStore(builder, x1, xs[1]); + LLVMBuildStore(builder, x0, xs[2]); + LLVMBuildStore(builder, x1, xs[3]); + LLVMBuildStore(builder, y0, ys[0]); + LLVMBuildStore(builder, y0, ys[1]); + LLVMBuildStore(builder, y1, ys[2]); + LLVMBuildStore(builder, y1, ys[3]); + LLVMBuildStore(builder, face, zs[0]); + LLVMBuildStore(builder, face, zs[1]); + LLVMBuildStore(builder, face, zs[2]); + LLVMBuildStore(builder, face, zs[3]); + + lp_build_endif(&edge_if); + + x00 = LLVMBuildLoad(builder, xs[0], ""); + x01 = LLVMBuildLoad(builder, xs[1], ""); + x10 = LLVMBuildLoad(builder, xs[2], ""); + x11 = LLVMBuildLoad(builder, xs[3], ""); + y00 = LLVMBuildLoad(builder, ys[0], ""); + y01 = LLVMBuildLoad(builder, ys[1], ""); + y10 = LLVMBuildLoad(builder, ys[2], ""); + y11 = LLVMBuildLoad(builder, ys[3], ""); + z00 = LLVMBuildLoad(builder, zs[0], ""); + z01 = LLVMBuildLoad(builder, zs[1], ""); + z10 = LLVMBuildLoad(builder, zs[2], ""); + z11 = LLVMBuildLoad(builder, zs[3], ""); } if (linear_mask) { @@ -937,12 +1104,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* get x0/x1 texels */ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x0, y0, z0, + x00, y00, z00, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors[0][0]); lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x1, y0, z0, + x01, y01, z01, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors[0][1]); @@ -973,12 +1140,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* get x0/x1 texels at y1 */ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x0, y1, z0, + x10, y10, z10, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors[1][0]); lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x1, y1, z0, + x11, y11, z11, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors[1][1]); @@ -1012,22 +1179,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* get x0/x1/y0/y1 texels at z1 */ lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x0, y0, z1, + x00, y00, z1, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors1[0][0]); lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x1, y0, z1, + x01, y01, z1, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors1[0][1]); lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x0, y1, z1, + x10, y10, z1, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors1[1][0]); lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, - x1, y1, z1, + x11, y11, z1, row_stride_vec, img_stride_vec, data_ptr, mipoffsets, neighbors1[1][1]); @@ -2306,15 +2473,25 @@ lp_build_sample_soa(struct gallivm_state *gallivm, use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r); } } + if (static_texture_state->target == PIPE_TEXTURE_CUBE && + derived_sampler_state.seamless_cube_map && + (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR || + derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { + /* theoretically possible with AoS filtering but not implemented (complex!) */ + use_aos = 0; + } if ((gallivm_debug & GALLIVM_DEBUG_PERF) && !use_aos && util_format_fits_8unorm(bld.format_desc)) { debug_printf("%s: using floating point linear filtering for %s\n", __FUNCTION__, bld.format_desc->short_name); - debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d wrapr %d\n", + debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" + " wraps %d wrapt %d wrapr %d\n", derived_sampler_state.min_img_filter, derived_sampler_state.mag_img_filter, derived_sampler_state.min_mip_filter, + static_texture_state->target, + derived_sampler_state.seamless_cube_map, derived_sampler_state.wrap_s, derived_sampler_state.wrap_t, derived_sampler_state.wrap_r); |