summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c72
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c331
3 files changed, 212 insertions, 193 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index ea7dd95b78b..37490e47c85 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -265,7 +265,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
first_level = bld->dynamic_state->first_level(bld->dynamic_state,
bld->gallivm, unit);
- first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
+ first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
float_size = lp_build_int_to_float(float_size_bld, int_size);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 236b68bb0ce..1f3a98a6896 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -516,6 +516,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -625,6 +626,9 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
}
+ if (mipoffsets) {
+ offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+ }
lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
x_subcoord, y_subcoord,
@@ -645,6 +649,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -711,6 +716,9 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
row_stride_vec, img_stride_vec,
&offset,
&x_subcoord, &y_subcoord);
+ if (mipoffsets) {
+ offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+ }
lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
x_subcoord, y_subcoord,
@@ -966,6 +974,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -1073,6 +1082,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
bld->static_state->wrap_s,
&x_offset0, &x_offset1,
&x_subcoord[0], &x_subcoord[1]);
+ if (mipoffsets) {
+ x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
+ x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
+ }
+
for (z = 0; z < 2; z++) {
for (y = 0; y < 2; y++) {
offset[z][y][0] = x_offset0;
@@ -1149,6 +1163,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -1238,6 +1253,11 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
bld->format_desc->block.width,
x_icoord1, x_stride,
&x_offset1, &x_subcoord[1]);
+ if (mipoffsets) {
+ x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
+ x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
+ }
+
for (z = 0; z < 2; z++) {
for (y = 0; y < 2; y++) {
offset[z][y][0] = x_offset0;
@@ -1330,6 +1350,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef img_stride1_vec = NULL;
LLVMValueRef data_ptr0;
LLVMValueRef data_ptr1;
+ LLVMValueRef mipoff0 = NULL;
+ LLVMValueRef mipoff1 = NULL;
LLVMValueRef colors0_lo, colors0_hi;
LLVMValueRef colors1_lo, colors1_hi;
@@ -1337,13 +1359,21 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel0,
&size0,
&row_stride0_vec, &img_stride0_vec);
- data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+ if (bld->num_lods == 1) {
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+ }
+ else {
+ /* This path should work for num_lods 1 too but slightly less efficient */
+ data_ptr0 = bld->base_ptr;
+ mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
+ }
+
if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest_afloat(bld,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
&colors0_lo, &colors0_hi);
}
else {
@@ -1351,7 +1381,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_sample_image_linear_afloat(bld,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
&colors0_lo, &colors0_hi);
}
}
@@ -1360,7 +1390,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_sample_image_nearest(bld,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
&colors0_lo, &colors0_hi);
}
else {
@@ -1368,7 +1398,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_sample_image_linear(bld,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
&colors0_lo, &colors0_hi);
}
}
@@ -1422,21 +1452,30 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel1,
&size1,
&row_stride1_vec, &img_stride1_vec);
- data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &size1,
+ &row_stride1_vec, &img_stride1_vec);
+ if (bld->num_lods == 1) {
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ }
+ else {
+ data_ptr1 = bld->base_ptr;
+ mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
+ }
if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest_afloat(bld,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
&colors1_lo, &colors1_hi);
}
else {
lp_build_sample_image_linear_afloat(bld,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
&colors1_lo, &colors1_hi);
}
}
@@ -1445,14 +1484,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_sample_image_nearest(bld,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
&colors1_lo, &colors1_hi);
}
else {
lp_build_sample_image_linear(bld,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
&colors1_lo, &colors1_hi);
}
}
@@ -1580,6 +1619,19 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
+ /*
+ * XXX this should to all lods into account, if some are min
+ * some max probably could hack up the coords/weights in the linear
+ * path with selects to work for nearest.
+ * If that's just two quads sitting next to each other it seems
+ * quite ok to do the same filtering method on both though, at
+ * least unless we have explicit lod (and who uses different
+ * min/mag filter with that?)
+ */
+ if (bld->num_lods > 1)
+ lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
+ lp_build_const_int32(bld->gallivm, 0), "");
+
/* minify = lod >= 0.0 */
minify = LLVMBuildICmp(builder, LLVMIntSGE,
lod_ipart, int_bld->zero, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index daa49506ca5..97a23df4a87 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -82,6 +82,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef y_stride,
LLVMValueRef z_stride,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef texel_out[4])
{
const struct lp_sampler_static_state *static_state = bld->static_state;
@@ -139,6 +140,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
bld->format_desc,
x, y, z, y_stride, z_stride,
&offset, &i, &j);
+ if (mipoffsets) {
+ offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
+ }
if (use_border) {
/* If we can sample the border color, it means that texcoords may
@@ -594,6 +598,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -661,7 +666,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x, y, z,
row_stride_vec, img_stride_vec,
- data_ptr, colors_out);
+ data_ptr, mipoffsets, colors_out);
}
@@ -676,6 +681,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
+ LLVMValueRef mipoffsets,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
@@ -756,12 +762,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x0, y0, z0,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors[0][0]);
+ data_ptr, mipoffsets, neighbors[0][0]);
lp_build_sample_texel_soa(bld, unit,
width_vec, height_vec, depth_vec,
x1, y0, z0,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors[0][1]);
+ data_ptr, mipoffsets, neighbors[0][1]);
if (dims == 1) {
/* Interpolate two samples from 1D image to produce one color */
@@ -780,12 +786,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x0, y1, z0,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors[1][0]);
+ data_ptr, mipoffsets, neighbors[1][0]);
lp_build_sample_texel_soa(bld, unit,
width_vec, height_vec, depth_vec,
x1, y1, z0,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors[1][1]);
+ data_ptr, mipoffsets, neighbors[1][1]);
/* Bilinear interpolate the four samples from the 2D image / 3D slice */
for (chan = 0; chan < 4; chan++) {
@@ -806,22 +812,22 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
width_vec, height_vec, depth_vec,
x0, y0, z1,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors1[0][0]);
+ data_ptr, mipoffsets, neighbors1[0][0]);
lp_build_sample_texel_soa(bld, unit,
width_vec, height_vec, depth_vec,
x1, y0, z1,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors1[0][1]);
+ data_ptr, mipoffsets, neighbors1[0][1]);
lp_build_sample_texel_soa(bld, unit,
width_vec, height_vec, depth_vec,
x0, y1, z1,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors1[1][0]);
+ data_ptr, mipoffsets, neighbors1[1][0]);
lp_build_sample_texel_soa(bld, unit,
width_vec, height_vec, depth_vec,
x1, y1, z1,
row_stride_vec, img_stride_vec,
- data_ptr, neighbors1[1][1]);
+ data_ptr, mipoffsets, neighbors1[1][1]);
/* Bilinear interpolate the four samples from the second Z slice */
for (chan = 0; chan < 4; chan++) {
@@ -878,6 +884,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef img_stride1_vec = NULL;
LLVMValueRef data_ptr0 = NULL;
LLVMValueRef data_ptr1 = NULL;
+ LLVMValueRef mipoff0 = NULL;
+ LLVMValueRef mipoff1 = NULL;
LLVMValueRef colors0[4], colors1[4];
unsigned chan;
@@ -885,12 +893,19 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel0,
&size0,
&row_stride0_vec, &img_stride0_vec);
- data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+ if (bld->num_lods == 1) {
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
+ }
+ else {
+ /* This path should work for num_lods 1 too but slightly less efficient */
+ data_ptr0 = bld->base_ptr;
+ mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
+ }
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest(bld, unit,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
colors0);
}
else {
@@ -898,7 +913,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_sample_image_linear(bld, unit,
size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r,
+ data_ptr0, mipoff0, s, t, r,
colors0);
}
@@ -943,19 +958,25 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel1,
&size1,
&row_stride1_vec, &img_stride1_vec);
- data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (bld->num_lods == 1) {
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ }
+ else {
+ data_ptr1 = bld->base_ptr;
+ mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
+ }
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
lp_build_sample_image_nearest(bld, unit,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
colors1);
}
else {
lp_build_sample_image_linear(bld, unit,
size1,
row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
+ data_ptr1, mipoff1, s, t, r,
colors1);
}
@@ -1125,6 +1146,16 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
+ /*
+ * XXX this should to all lods into account, if some are min
+ * some max probably could hack up the coords/weights in the linear
+ * path with selects to work for nearest.
+ * If that's just two quads sitting next to each other it seems
+ * quite ok to do the same filtering method on both though, at
+ * least unless we have explicit lod (and who uses different
+ * min/mag filter with that?)
+ */
+
/* minify = lod >= 0.0 */
minify = LLVMBuildICmp(builder, LLVMIntSGE,
lod_ipart, int_bld->zero, "");
@@ -1184,16 +1215,6 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
/* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
if (explicit_lod && bld->static_state->target != PIPE_BUFFER) {
- /* could also avoid this if there are no mipmaps */
- /* XXX temporary hack until ordinary sampling handles per-quad lod the same */
- bld->num_lods = bld->coord_type.length / 4;
- bld->float_size_type = bld->float_size_in_type;
- bld->float_size_type.length = bld->num_lods > 1 ? bld->coord_type.length :
- bld->float_size_in_type.length;
- bld->int_size_type = lp_int_type(bld->float_size_type);
- lp_build_context_init(&bld->int_size_bld, bld->gallivm, bld->int_size_type);
- lp_build_context_init(&bld->float_size_bld, bld->gallivm, bld->float_size_type);
-
ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
perquadi_bld->type, explicit_lod, 0);
lp_build_nearest_mip_level(bld, unit, ilevel, &ilevel);
@@ -1275,6 +1296,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
* Do shadow test/comparison.
* \param p the texcoord Z (aka R, aka P) component
* \param texel the texel to compare against (use the X channel)
+ * Ideally this should really be done per-sample.
*/
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
@@ -1358,6 +1380,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
LLVMValueRef texel_out[4])
{
unsigned dims = texture_dims(static_state->target);
+ unsigned num_quads = type.length / 4;
+ unsigned mip_filter = static_state->min_mip_filter;
struct lp_build_sample_context bld;
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMBuilderRef builder = gallivm->builder;
@@ -1396,7 +1420,18 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
bld.perquadi_type = lp_int_type(bld.perquadf_type);
- bld.num_lods = 1;
+ /*
+ * There are other situations where at least the multiple int lods could be
+ * avoided like min and max lod being equal.
+ */
+ if ((is_fetch && explicit_lod && bld.static_state->target != PIPE_BUFFER) ||
+ (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ bld.num_lods = num_quads;
+ }
+ else {
+ bld.num_lods = 1;
+ }
+
bld.float_size_type = bld.float_size_in_type;
bld.float_size_type.length = bld.num_lods > 1 ? type.length :
bld.float_size_in_type.length;
@@ -1456,8 +1491,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
else {
LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
- unsigned num_quads = type.length / 4;
- const unsigned mip_filter = bld.static_state->min_mip_filter;
boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
lp_is_simple_wrap_mode(static_state->wrap_s) &&
lp_is_simple_wrap_mode(static_state->wrap_t);
@@ -1494,16 +1527,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
/*
* we only try 8-wide sampling with soa as it appears to
- * be a loss with aos with AVX.
+ * be a loss with aos with AVX (but it should work).
+ * (It should be faster if we'd support avx2)
*/
- if (num_quads == 1 || (mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- !use_aos)) {
+ if (num_quads == 1 || !use_aos) {
if (num_quads > 1) {
- LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
- /* These parameters are the same for all quads */
- lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
- ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
+ /*
+ * These parameters are the same for all quads,
+ * could probably simplify.
+ */
+ lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
+ ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
+ }
}
if (use_aos) {
/* do sampling/filtering with fixed pt arithmetic */
@@ -1523,170 +1561,99 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
}
}
else {
- struct lp_build_if_state if_ctx;
- LLVMValueRef notsame_levels, notsame;
- LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
- LLVMValueRef texels[4];
- LLVMValueRef texelout[4];
unsigned j;
-
- texels[0] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texr");
- texels[1] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texg");
- texels[2] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texb");
- texels[3] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texa");
-
- /* only build the if if we MAY split, otherwise always split */
- if (!use_aos) {
- notsame = lp_build_extract_broadcast(gallivm,
- bld.perquadi_bld.type,
- bld.perquadi_bld.type,
- ilevel0, index0);
- notsame = lp_build_sub(&bld.perquadi_bld, ilevel0, notsame);
- notsame_levels = lp_build_any_true_range(&bld.perquadi_bld, num_quads,
- notsame);
+ struct lp_build_sample_context bld4;
+ struct lp_type type4 = type;
+ unsigned i;
+ LLVMValueRef texelout4[4];
+ LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
+
+ type4.length = 4;
+
+ /* Setup our build context */
+ memset(&bld4, 0, sizeof bld4);
+ bld4.gallivm = bld.gallivm;
+ bld4.static_state = bld.static_state;
+ bld4.dynamic_state = bld.dynamic_state;
+ bld4.format_desc = bld.format_desc;
+ bld4.dims = bld.dims;
+ bld4.row_stride_array = bld.row_stride_array;
+ bld4.img_stride_array = bld.img_stride_array;
+ bld4.base_ptr = bld.base_ptr;
+ bld4.mip_offsets = bld.mip_offsets;
+ bld4.int_size = bld.int_size;
+
+ bld4.vector_width = lp_type_width(type4);
+
+ bld4.float_type = lp_type_float(32);
+ bld4.int_type = lp_type_int(32);
+ bld4.coord_type = type4;
+ bld4.int_coord_type = lp_int_type(type4);
+ bld4.float_size_in_type = lp_type_float(32);
+ bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
+ bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
+ bld4.texel_type = type4;
+ bld4.perquadf_type = type4;
+ /* we want native vector size to be able to use our intrinsics */
+ bld4.perquadf_type.length = 1;
+ bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
+
+ bld4.num_lods = 1;
+ bld4.int_size_type = bld4.int_size_in_type;
+ bld4.float_size_type = bld4.float_size_in_type;
+
+ lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
+ lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
+ lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
+ lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
+ lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
+ lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
+ lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
+ lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
+ lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
+ lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
+ lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
+ lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);
+
+ for (i = 0; i < num_quads; i++) {
+ LLVMValueRef s4, t4, r4;
+ LLVMValueRef lod_iparts, lod_fparts = NULL;
+ LLVMValueRef ilevel0s, ilevel1s = NULL;
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+
+ s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
+ t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
+ r4 = lp_build_extract_range(gallivm, r, 4*i, 4);
+ lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
+ ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- notsame = lp_build_extract_broadcast(gallivm,
- bld.perquadi_bld.type,
- bld.perquadi_bld.type,
- ilevel1, index0);
- notsame = lp_build_sub(&bld.perquadi_bld, ilevel1, notsame);
- notsame = lp_build_any_true_range(&bld.perquadi_bld, num_quads, notsame);
- notsame_levels = LLVMBuildOr(builder, notsame_levels, notsame, "");
- }
- lp_build_if(&if_ctx, gallivm, notsame_levels);
- }
-
- {
- struct lp_build_sample_context bld4;
- struct lp_type type4 = type;
- unsigned i;
- LLVMValueRef texelout4[4];
- LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
-
- type4.length = 4;
-
- /* Setup our build context */
- memset(&bld4, 0, sizeof bld4);
- bld4.gallivm = bld.gallivm;
- bld4.static_state = bld.static_state;
- bld4.dynamic_state = bld.dynamic_state;
- bld4.format_desc = bld.format_desc;
- bld4.dims = bld.dims;
- bld4.row_stride_array = bld.row_stride_array;
- bld4.img_stride_array = bld.img_stride_array;
- bld4.base_ptr = bld.base_ptr;
- bld4.mip_offsets = bld.mip_offsets;
- bld4.int_size = bld.int_size;
-
- bld4.vector_width = lp_type_width(type4);
-
- bld4.float_type = lp_type_float(32);
- bld4.int_type = lp_type_int(32);
- bld4.coord_type = type4;
- bld4.int_coord_type = lp_int_type(type4);
- bld4.float_size_in_type = lp_type_float(32);
- bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
- bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
- bld4.float_size_type = bld4.float_size_in_type;
- bld4.int_size_type = bld4.int_size_in_type;
- bld4.texel_type = type4;
- bld4.perquadf_type = type4;
- /* we want native vector size to be able to use our intrinsics */
- bld4.perquadf_type.length = 1;
- bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
- bld4.num_lods = 1;
-
- lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
- lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
- lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
- lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
- lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
- lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
- lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
- lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
- lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
- lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
- lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
- lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);
-
- for (i = 0; i < num_quads; i++) {
- LLVMValueRef s4, t4, r4;
- LLVMValueRef lod_iparts, lod_fparts = NULL;
- LLVMValueRef ilevel0s, ilevel1s = NULL;
- LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-
- s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
- t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
- r4 = lp_build_extract_range(gallivm, r, 4*i, 4);
- lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
- ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
- lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
- }
-
- if (use_aos) {
- /* do sampling/filtering with fixed pt arithmetic */
- lp_build_sample_aos(&bld4, unit,
- s4, t4, r4,
- lod_iparts, lod_fparts,
- ilevel0s, ilevel1s,
- texelout4);
- }
-
- else {
- lp_build_sample_general(&bld4, unit,
- s4, t4, r4,
- lod_iparts, lod_fparts,
- ilevel0s, ilevel1s,
- texelout4);
- }
- for (j = 0; j < 4; j++) {
- texelouttmp[j][i] = texelout4[j];
- }
- }
- for (j = 0; j < 4; j++) {
- texelout[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
- LLVMBuildStore(builder, texelout[j], texels[j]);
- }
- }
- if (!use_aos) {
- LLVMValueRef ilevel0s, lod_iparts, ilevel1s = NULL;
-
- lp_build_else(&if_ctx);
-
- /* These parameters are the same for all quads */
- lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
- ilevel0s = LLVMBuildExtractElement(builder, ilevel0, index0, "");
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- ilevel1s = LLVMBuildExtractElement(builder, ilevel1, index0, "");
+ ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
+ lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
}
if (use_aos) {
/* do sampling/filtering with fixed pt arithmetic */
- lp_build_sample_aos(&bld, unit,
- s, t, r,
- lod_iparts, lod_fpart,
+ lp_build_sample_aos(&bld4, unit,
+ s4, t4, r4,
+ lod_iparts, lod_fparts,
ilevel0s, ilevel1s,
- texelout);
+ texelout4);
}
else {
- lp_build_sample_general(&bld, unit,
- s, t, r,
- lod_iparts, lod_fpart,
+ lp_build_sample_general(&bld4, unit,
+ s4, t4, r4,
+ lod_iparts, lod_fparts,
ilevel0s, ilevel1s,
- texelout);
+ texelout4);
}
for (j = 0; j < 4; j++) {
- LLVMBuildStore(builder, texelout[j], texels[j]);
+ texelouttmp[j][i] = texelout4[j];
}
-
- lp_build_endif(&if_ctx);
}
for (j = 0; j < 4; j++) {
- texel_out[j] = LLVMBuildLoad(builder, texels[j], "");
+ texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
}
}
}