diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 304 |
1 files changed, 201 insertions, 103 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 6b1509c7cfa..ff8cbf604c0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1030,20 +1030,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef neighbors[2][2][4]; int chan, texel_index; boolean seamless_cube_filter, accurate_cube_corners; + unsigned chan_swiz = bld->static_texture_state->swizzle_r; seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && bld->static_sampler_state->seamless_cube_map; - /* - * XXX I don't know how this is really supposed to work with gather. From GL - * spec wording (not gather specific) it sounds like the 4th missing texel - * should be an average of the other 3, hence for gather could return this. - * This is however NOT how the code here works, which just fixes up the - * weights used for filtering instead. And of course for gather there is - * no filter to tweak... - */ - accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter && - !is_gather; + + accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter; lp_build_extract_image_sizes(bld, &bld->int_size_bld, @@ -1382,94 +1375,191 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * as well) here. */ if (accurate_cube_corners) { - LLVMValueRef w00, w01, w10, w11, wx0, wy0; - LLVMValueRef c_weight, c00, c01, c10, c11; - LLVMValueRef have_corner, one_third, tmp; + LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f; + LLVMValueRef have_corner, one_third; - colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); + colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0"); + colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1"); + colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2"); + colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3"); have_corner = LLVMBuildLoad(builder, have_corners, ""); lp_build_if(&corner_if, bld->gallivm, have_corner); - /* - * we can't use standard 2d lerp as we need per-element weight - * in case of corners, so just calculate bilinear result as - * w00*s00 + w01*s01 + w10*s10 + w11*s11. - * (This is actually less work than using 2d lerp, 7 vs. 9 instructions, - * however calculating the weights needs another 6, so actually probably - * not slower than 2d lerp only for 4 channels as weights only need - * to be calculated once - of course fixing the weights has additional cost.) - */ - wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); - wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); - w00 = lp_build_mul(coord_bld, wx0, wy0); - w01 = lp_build_mul(coord_bld, s_fpart, wy0); - w10 = lp_build_mul(coord_bld, wx0, t_fpart); - w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); - - /* find corner weight */ + one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, + 1.0f/3.0f); + + /* find corner */ c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]); - c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); + c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]); - c_weight = lp_build_select(coord_bld, c01, w01, c_weight); + c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]); - c_weight = lp_build_select(coord_bld, c10, w10, c_weight); + c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]); - c_weight = lp_build_select(coord_bld, c11, w11, c_weight); + c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); - /* - * add 1/3 of the corner weight to each of the 3 other samples - * and null out corner weight - */ - one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f); - c_weight = lp_build_mul(coord_bld, c_weight, one_third); - w00 = lp_build_add(coord_bld, w00, c_weight); - c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); - w00 = lp_build_andnot(coord_bld, w00, c00); - w01 = lp_build_add(coord_bld, w01, c_weight); - c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); - w01 = lp_build_andnot(coord_bld, w01, c01); - w10 = lp_build_add(coord_bld, w10, c_weight); - c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); - w10 = lp_build_andnot(coord_bld, w10, c10); - w11 = lp_build_add(coord_bld, w11, c_weight); - c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); - w11 = lp_build_andnot(coord_bld, w11, c11); + if (!is_gather) { + /* + * we can't use standard 2d lerp as we need per-element weight + * in case of corners, so just calculate bilinear result as + * w00*s00 + w01*s01 + w10*s10 + w11*s11. + * (This is actually less work than using 2d lerp, 7 vs. 9 + * instructions, however calculating the weights needs another 6, + * so actually probably not slower than 2d lerp only for 4 channels + * as weights only need to be calculated once - of course fixing + * the weights has additional cost.) + */ + LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp; + wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); + wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); + w00 = lp_build_mul(coord_bld, wx0, wy0); + w01 = lp_build_mul(coord_bld, s_fpart, wy0); + w10 = lp_build_mul(coord_bld, wx0, t_fpart); + w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); + + /* find corner weight */ + c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); + c_weight = lp_build_select(coord_bld, c01, w01, c_weight); + c_weight = lp_build_select(coord_bld, c10, w10, c_weight); + c_weight = lp_build_select(coord_bld, c11, w11, c_weight); - if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { - for (chan = 0; chan < 4; chan++) { - colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]); - tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); - colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); - tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); - colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); - tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); - colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); + /* + * add 1/3 of the corner weight to the weight of the 3 other + * samples and null out corner weight. + */ + c_weight = lp_build_mul(coord_bld, c_weight, one_third); + w00 = lp_build_add(coord_bld, w00, c_weight); + w00 = lp_build_andnot(coord_bld, w00, c00f); + w01 = lp_build_add(coord_bld, w01, c_weight); + w01 = lp_build_andnot(coord_bld, w01, c01f); + w10 = lp_build_add(coord_bld, w10, c_weight); + w10 = lp_build_andnot(coord_bld, w10, c10f); + w11 = lp_build_add(coord_bld, w11, c_weight); + w11 = lp_build_andnot(coord_bld, w11, c11f); + + if (bld->static_sampler_state->compare_mode == + PIPE_TEX_COMPARE_NONE) { + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_mul(coord_bld, w00, + neighbors[0][0][chan]); + tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); + colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); + tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); + colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); + tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); + colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); + } + } + else { + LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; + cmpval00 = lp_build_sample_comparefunc(bld, coords[4], + neighbors[0][0][0]); + cmpval01 = lp_build_sample_comparefunc(bld, coords[4], + neighbors[0][1][0]); + cmpval10 = lp_build_sample_comparefunc(bld, coords[4], + neighbors[1][0][0]); + cmpval11 = lp_build_sample_comparefunc(bld, coords[4], + neighbors[1][1][0]); + /* + * inputs to interpolation are just masks so just add + * masked weights together + */ + cmpval00 = LLVMBuildBitCast(builder, cmpval00, + coord_bld->vec_type, ""); + cmpval01 = LLVMBuildBitCast(builder, cmpval01, + coord_bld->vec_type, ""); + cmpval10 = LLVMBuildBitCast(builder, cmpval10, + coord_bld->vec_type, ""); + cmpval11 = LLVMBuildBitCast(builder, cmpval11, + coord_bld->vec_type, ""); + colors0[0] = lp_build_and(coord_bld, w00, cmpval00); + tmp = lp_build_and(coord_bld, w01, cmpval01); + colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); + tmp = lp_build_and(coord_bld, w10, cmpval10); + colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); + tmp = lp_build_and(coord_bld, w11, cmpval11); + colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); + colors0[1] = colors0[2] = colors0[3] = colors0[0]; } } else { - LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; - cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); - cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); - cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); - cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); - /* inputs to interpolation are just masks so just add masked weights together */ - cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, ""); - cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, ""); - cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, ""); - cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, ""); - colors0[0] = lp_build_and(coord_bld, w00, cmpval00); - tmp = lp_build_and(coord_bld, w01, cmpval01); - colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); - tmp = lp_build_and(coord_bld, w10, cmpval10); - colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); - tmp = lp_build_and(coord_bld, w11, cmpval11); - colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); - colors0[1] = colors0[2] = colors0[3] = colors0[0]; + /* + * We don't have any weights to adjust, so instead calculate + * the fourth texel as simply the average of the other 3. + * (This would work for non-gather too, however we'd have + * a boatload more of the select stuff due to there being + * 4 times as many colors as weights.) + */ + LLVMValueRef col00, col01, col10, col11; + LLVMValueRef colc, colc0, colc1; + col10 = lp_build_swizzle_soa_channel(texel_bld, + neighbors[1][0], chan_swiz); + col11 = lp_build_swizzle_soa_channel(texel_bld, + neighbors[1][1], chan_swiz); + col01 = lp_build_swizzle_soa_channel(texel_bld, + neighbors[0][1], chan_swiz); + col00 = lp_build_swizzle_soa_channel(texel_bld, + neighbors[0][0], chan_swiz); + + /* + * The spec says for comparison filtering, the comparison + * must happen before synthesizing the new value. + * This means all gathered values are always 0 or 1, + * except for the non-existing texel, which can be 0,1/3,2/3,1... + * Seems like we'd be allowed to just return 0 or 1 too, so we + * could simplify and pass down the compare mask values to the + * end (using int arithmetic/compare on the mask values to + * construct the fourth texel) and only there convert to floats + * but it's probably not worth it (it might be easier for the cpu + * but not for the code)... + */ + if (bld->static_sampler_state->compare_mode != + PIPE_TEX_COMPARE_NONE) { + LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; + cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00); + cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01); + cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10); + cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11); + col00 = lp_build_select(texel_bld, cmpval00, + texel_bld->one, texel_bld->zero); + col01 = lp_build_select(texel_bld, cmpval01, + texel_bld->one, texel_bld->zero); + col10 = lp_build_select(texel_bld, cmpval10, + texel_bld->one, texel_bld->zero); + col11 = lp_build_select(texel_bld, cmpval11, + texel_bld->one, texel_bld->zero); + } + + /* + * Null out corner color. + */ + col00 = lp_build_andnot(coord_bld, col00, c00f); + col01 = lp_build_andnot(coord_bld, col01, c01f); + col10 = lp_build_andnot(coord_bld, col10, c10f); + col11 = lp_build_andnot(coord_bld, col11, c11f); + + /* + * New corner texel color is all colors added / 3. + */ + colc0 = lp_build_add(coord_bld, col00, col01); + colc1 = lp_build_add(coord_bld, col10, col11); + colc = lp_build_add(coord_bld, colc0, colc1); + colc = lp_build_mul(coord_bld, one_third, colc); + + /* + * Replace the corner texel color with the new value. + */ + col00 = lp_build_select(coord_bld, c00, colc, col00); + col01 = lp_build_select(coord_bld, c01, colc, col01); + col10 = lp_build_select(coord_bld, c10, colc, col10); + col11 = lp_build_select(coord_bld, c11, colc, col11); + + colors0[0] = col10; + colors0[1] = col11; + colors0[2] = col01; + colors0[3] = col00; } LLVMBuildStore(builder, colors0[0], colorss[0]); @@ -1488,7 +1578,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * end of sampling (much less values to swizzle), but this * obviously cannot work when using gather. */ - unsigned chan_swiz = bld->static_texture_state->swizzle_r; colors0[0] = lp_build_swizzle_soa_channel(texel_bld, neighbors[1][0], chan_swiz); @@ -1524,25 +1613,14 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, if (is_gather) { /* more hacks for swizzling, should be X, ONE or ZERO... */ - unsigned chan_swiz = bld->static_texture_state->swizzle_r; - if (chan_swiz <= PIPE_SWIZZLE_W) { - colors0[0] = lp_build_select(texel_bld, cmpval10, - texel_bld->one, texel_bld->zero); - colors0[1] = lp_build_select(texel_bld, cmpval11, - texel_bld->one, texel_bld->zero); - colors0[2] = lp_build_select(texel_bld, cmpval01, - texel_bld->one, texel_bld->zero); - colors0[3] = lp_build_select(texel_bld, cmpval00, - texel_bld->one, texel_bld->zero); - } - else if (chan_swiz == PIPE_SWIZZLE_0) { - colors0[0] = colors0[1] = colors0[2] = colors0[3] = - texel_bld->zero; - } - else { - colors0[0] = colors0[1] = colors0[2] = colors0[3] = - texel_bld->one; - } + colors0[0] = lp_build_select(texel_bld, cmpval10, + texel_bld->one, texel_bld->zero); + colors0[1] = lp_build_select(texel_bld, cmpval11, + texel_bld->one, texel_bld->zero); + colors0[2] = lp_build_select(texel_bld, cmpval01, + texel_bld->one, texel_bld->zero); + colors0[3] = lp_build_select(texel_bld, cmpval00, + texel_bld->one, texel_bld->zero); } else { colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, @@ -1635,6 +1713,26 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } } } + if (is_gather) { + /* + * For gather, we can't do our usual channel swizzling done later, + * so do it here. It only really matters for 0/1 swizzles in case + * of comparison filtering, since in this case the results would be + * wrong, without comparison it should all work out alright but it + * can't hurt to do that here, since it will instantly drop all + * calculations above, though it's a rather stupid idea to do + * gather on a channel which will always return 0 or 1 in any case... + */ + if (chan_swiz == PIPE_SWIZZLE_1) { + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = texel_bld->one; + } + } else if (chan_swiz == PIPE_SWIZZLE_0) { + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = texel_bld->zero; + } + } + } } |