4 files changed, 169 insertions, 131 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 89d72494be0..e1cfd78e885 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *rho_bld = &bld->lodf_bld;
    const unsigned dims = bld->dims;
    LLVMValueRef ddx_ddy[2];
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef first_level, first_level_vec;
    unsigned length = coord_bld->type.length;
    unsigned num_quads = length / 4;
-   boolean rho_per_quad = levelf_bld->type.length != length;
+   boolean rho_per_quad = rho_bld->type.length != length;
    unsigned i;
    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    LLVMValueRef rho_xvec, rho_yvec;
@@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
        */
       if (rho_per_quad) {
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, cube_rho, 0);
+                                         rho_bld->type, cube_rho, 0);
       }
       else {
          rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
       }
       if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
       }
       /* Could optimize this for single quad just skip the broadcast */
       cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                            levelf_bld->type, float_size, index0);
-      rho = lp_build_mul(levelf_bld, cubesize, rho);
+                                            rho_bld->type, float_size, index0);
+      rho = lp_build_mul(rho_bld, cubesize, rho);
    }
    else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
       LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
              * otherwise would also need different code to per-pixel lod case.
              */
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
 
       }
       else {
@@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
              * rho_vec contains per-pixel rho, convert to scalar per quad.
              */
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
       }
    }
@@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
 
          if (rho_per_quad) {
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            rho_bld->type, rho, 0);
          }
          else {
             /*
@@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
              */
             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
          }
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(rho_bld, rho);
       }
       else {
          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
             }
             if (rho_per_quad) {
                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                               levelf_bld->type, rho, 0);
+                                               rho_bld->type, rho, 0);
             }
             else {
                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
@@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
                }
             }
             if (!rho_per_quad) {
-               rho = lp_build_broadcast_scalar(levelf_bld, rho);
+               rho = lp_build_broadcast_scalar(rho_bld, rho);
             }
          }
       }
@@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
  *
- * The resulting lod is scalar per quad, so only the first value per quad
- * passed in from lod_bias, explicit_lod is used.
+ * The resulting lod can be scalar per quad or be per element.
  */
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
@@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *lodf_bld = &bld->lodf_bld;
    LLVMValueRef lod;
 
-   *out_lod_ipart = bld->leveli_bld.zero;
-   *out_lod_positive = bld->leveli_bld.zero;
-   *out_lod_fpart = levelf_bld->zero;
+   *out_lod_ipart = bld->lodi_bld.zero;
+   *out_lod_positive = bld->lodi_bld.zero;
+   *out_lod_fpart = lodf_bld->zero;
 
    /*
     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
@@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          bld->dynamic_state->min_lod(bld->dynamic_state,
                                      bld->gallivm, sampler_unit);
 
-      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+      lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
    }
    else {
       if (explicit_lod) {
          if (bld->num_lods != bld->coord_type.length)
             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                            levelf_bld->type, explicit_lod, 0);
+                                            lodf_bld->type, explicit_lod, 0);
          else
             lod = explicit_lod;
       }
@@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                 * Don't actually need both all the time, ipart is needed
                 * for nearest mipfilter, pos_or_zero if min != mag.
                 */
-               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                return;
             }
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
+               lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
                                       out_lod_ipart, out_lod_fpart);
-               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                                rho, levelf_bld->one);
+               *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                                rho, lodf_bld->one);
                return;
             }
          }
 
          if (0) {
-            lod = lp_build_log2(levelf_bld, rho);
+            lod = lp_build_log2(lodf_bld, rho);
          }
          else {
-            lod = lp_build_fast_log2(levelf_bld, rho);
+            lod = lp_build_fast_log2(lodf_bld, rho);
          }
 
          /* add shader lod bias */
          if (lod_bias) {
             if (bld->num_lods != bld->coord_type.length)
                lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                                    levelf_bld->type, lod_bias, 0);
+                                                    lodf_bld->type, lod_bias, 0);
             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
          }
       }
@@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef sampler_lod_bias =
             bld->dynamic_state->lod_bias(bld->dynamic_state,
                                          bld->gallivm, sampler_unit);
-         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
+         sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
                                                       sampler_lod_bias);
          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
       }
@@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
          LLVMValueRef max_lod =
             bld->dynamic_state->max_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
+         max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
 
-         lod = lp_build_min(levelf_bld, lod, max_lod);
+         lod = lp_build_min(lodf_bld, lod, max_lod);
       }
       if (bld->static_sampler_state->apply_min_lod) {
          LLVMValueRef min_lod =
             bld->dynamic_state->min_lod(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+         min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
 
-         lod = lp_build_max(levelf_bld, lod, min_lod);
+         lod = lp_build_max(lodf_bld, lod, min_lod);
       }
    }
 
-   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
-                                    lod, levelf_bld->zero);
+   *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+                                    lod, lodf_bld->zero);
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
+         lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
                                 out_lod_ipart, out_lod_fpart);
       }
       else {
-         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
+         lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
       }
 
       lp_build_name(*out_lod_fpart, "lod_fpart");
    }
    else {
-      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
+      *out_lod_ipart = lp_build_iround(lodf_bld, lod);
    }
 
    lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
       out = lp_build_or(leveli_bld, out, out1);
-      if (bld->num_lods == bld->coord_bld.type.length) {
+      if (bld->num_mips == bld->coord_bld.type.length) {
          *out_of_bounds = out;
       }
-      else if (bld->num_lods == 1) {
+      else if (bld->num_mips == 1) {
          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
       }
       else {
-         assert(bld->num_lods == bld->coord_bld.type.length / 4);
+         assert(bld->num_mips == bld->coord_bld.type.length / 4);
          *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
                                                                 leveli_bld->type,
                                                                 bld->int_coord_bld.type,
@@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 
 
 /**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad)
- * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
+ * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
+ * part accordingly.
  * Later, we'll sample from those two mipmap levels and interpolate between them.
  */
 void
@@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
    LLVMValueRef clamp_min;
    LLVMValueRef clamp_max;
 
+   assert(bld->num_lods == bld->num_mips);
+
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
@@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
    LLVMValueRef indexes[2], offsets, offset1;
 
    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       indexes[1] = level;
       offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
       offset1 = LLVMBuildLoad(builder, offset1, "");
       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
    }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       unsigned i;
 
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
    else {
       unsigned i;
 
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
 
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
          offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
@@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef indexes[2], stride, stride1;
    indexes[0] = lp_build_const_int32(bld->gallivm, 0);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       indexes[1] = level;
       stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
       stride1 = LLVMBuildLoad(builder, stride1, "");
       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
    }
-   else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       LLVMValueRef stride1;
       unsigned i;
 
       stride = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_lods; i++) {
+      for (i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
          indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
       LLVMValueRef stride1;
       unsigned i;
 
-      assert (bld->num_lods == bld->coord_bld.type.length);
+      assert (bld->num_mips == bld->coord_bld.type.length);
 
       stride = bld->int_coord_bld.undef;
       for (i = 0; i < bld->coord_bld.type.length; i++) {
@@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
    /*
     * Compute width, height, depth at mipmap level 'ilevel'
     */
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
    }
@@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
       unsigned num_quads = bld->coord_bld.type.length / 4;
       unsigned i;
 
-      if (bld->num_lods == num_quads) {
+      if (bld->num_mips == num_quads) {
          /*
           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
           * intel "forgot" the variable shift count instruction until avx2.
@@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
          * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
          * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
          */
-         assert(bld->num_lods == bld->coord_bld.type.length);
+         assert(bld->num_mips == bld->coord_bld.type.length);
          if (bld->dims == 1) {
             assert(bld->int_size_in_bld.type.length == 1);
             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
@@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
          }
          else {
             LLVMValueRef ilevel1;
-            for (i = 0; i < bld->num_lods; i++) {
+            for (i = 0; i < bld->num_mips; i++) {
                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
                ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
                                                     bld->int_size_in_bld.type, ilevel, indexi);
@@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
             }
             *out_size = lp_build_concat(bld->gallivm, tmp,
                                         bld->int_size_in_bld.type,
-                                        bld->num_lods);
+                                        bld->num_mips);
          }
       }
    }
@@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
    struct lp_type size_type = size_bld->type;
 
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       *out_width = lp_build_extract_broadcast(bld->gallivm,
                                               size_type,
                                               coord_type,
@@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
       if (dims == 1) {
          *out_width = size;
       }
-      else if (bld->num_lods == num_quads) {
+      else if (bld->num_mips == num_quads) {
          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
          if (dims >= 2) {
             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
@@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
          }
       }
       else {
-         assert(bld->num_lods == bld->coord_type.length);
+         assert(bld->num_mips == bld->coord_type.length);
          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                 coord_type, size, 0);
          if (dims >= 2) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index a7ebe7e9ed8..e6b9f30d7bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -233,7 +233,10 @@ struct lp_build_sample_context
    /** SIMD vector width */
    unsigned vector_width;
 
-   /** number of lod values (valid are 1, length/4, length) */
+   /** number of mipmaps (valid are 1, length/4, length) */
+   unsigned num_mips;
+
+   /** number of lod values (valid are 1, length/4, length) */
    unsigned num_lods;
 
    /** regular scalar float type */
@@ -283,6 +286,14 @@ struct lp_build_sample_context
    struct lp_type leveli_type;
    struct lp_build_context leveli_bld;
 
+   /** Float lod type */
+   struct lp_type lodf_type;
+   struct lp_build_context lodf_bld;
+
+   /** Int lod type */
+   struct lp_type lodi_type;
+   struct lp_build_context lodi_bld;
+
    /* Common dynamic state values */
    LLVMValueRef row_stride_array;
    LLVMValueRef img_stride_array;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 7431388812d..c35b628270e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    lp_build_mipmap_level_sizes(bld, ilevel0,
                                &size0,
                                &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
    }
    else {
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->levelf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
+                                                     bld->lodf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
       struct lp_build_if_state if_ctx;
       LLVMValueRef need_lerp;
       unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       /* need_lerp = lod_fpart > 0 */
       if (bld->num_lods == 1) {
          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->leveli_bld.zero,
+                                   lod_fpart, bld->lodi_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * lod_fpart values have same sign.
           * We can however then skip the greater than comparison.
           */
-         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
-                                  bld->leveli_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
+         lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
+                                  bld->lodi_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
          else {
@@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          }
          else {
             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
 
             /* Take the LSB of lod_fpart */
@@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
        * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
        */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                 lp_build_const_int32(bld->gallivm, 0), "");
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8ad3b9f246a..c686d82de57 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    lp_build_mipmap_level_sizes(bld, ilevel0,
                                &size0,
                                &row_stride0_vec, &img_stride0_vec);
-   if (bld->num_lods == 1) {
+   if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
    }
    else {
@@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
       /* need_lerp = lod_fpart > 0 */
       if (bld->num_lods == 1) {
          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
-                                   lod_fpart, bld->levelf_bld.zero,
+                                   lod_fpart, bld->lodf_bld.zero,
                                    "need_lerp");
       }
       else {
@@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
           * negative values which would screw up filtering if not all
           * lod_fpart values have same sign.
           */
-         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
-                                  bld->levelf_bld.zero);
-         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
+         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
+                                  bld->lodf_bld.zero);
+         need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
                                       PIPE_FUNC_GREATER,
-                                      lod_fpart, bld->levelf_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
+                                      lod_fpart, bld->lodf_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
       }
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          lp_build_mipmap_level_sizes(bld, ilevel1,
                                      &size1,
                                      &row_stride1_vec, &img_stride1_vec);
-         if (bld->num_lods == 1) {
+         if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
          }
          else {
@@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          if (bld->num_lods != bld->coord_type.length)
             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                              bld->levelf_bld.type,
+                                                              bld->lodf_bld.type,
                                                               bld->texel_bld.type,
                                                               lod_fpart);
 
@@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                             mip_filter,
                             &lod_ipart, lod_fpart, lod_pos_or_zero);
    } else {
-      lod_ipart = bld->leveli_bld.zero;
-      *lod_pos_or_zero = bld->leveli_bld.zero;
+      lod_ipart = bld->lodi_bld.zero;
+      *lod_pos_or_zero = bld->lodi_bld.zero;
+   }
+
+   if (bld->num_lods != bld->num_mips) {
+      /* only makes sense if there's just a single mip level */
+      assert(bld->num_mips == 1);
+      lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
    }
 
    /*
@@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
        * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
        */
-      if (bld->leveli_bld.type.length > 1)
+      if (bld->num_lods > 1)
          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
                                                 lp_build_const_int32(bld->gallivm, 0), "");
 
@@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                      const LLVMValueRef *offsets,
                      LLVMValueRef *colors_out)
 {
-   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
+   struct lp_build_context *perquadi_bld = &bld->lodi_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    unsigned dims = bld->dims, chan;
    unsigned target = bld->static_texture_state->target;
@@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
    out_of_bounds = int_coord_bld->zero;
 
    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
-      if (bld->num_lods != int_coord_bld->type.length) {
+      if (bld->num_mips != int_coord_bld->type.length) {
          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
                                             perquadi_bld->type, explicit_lod, 0);
       }
@@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
    }
    else {
-      assert(bld->num_lods == 1);
+      assert(bld->num_mips == 1);
       if (bld->static_texture_state->target != PIPE_BUFFER) {
          ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
                                                   bld->gallivm, texture_unit);
@@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    unsigned target = static_texture_state->target;
    unsigned dims = texture_dims(target);
    unsigned num_quads = type.length / 4;
-   unsigned mip_filter, i;
+   unsigned mip_filter, min_img_filter, mag_img_filter, i;
    struct lp_build_sample_context bld;
    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
    }
 
+   min_img_filter = static_sampler_state->min_img_filter;
+   mag_img_filter = static_sampler_state->mag_img_filter;
+
+
    /*
     * This is all a bit complicated different paths are chosen for performance
     * reasons.
@@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    /*
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
-    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
-    * have length 4. Because lod_selector is always using per quad calcs in this
-    * case, but minification etc. don't need to bother. This is very brittle though
-    * e.g. num_lods might be 1 but still have multiple positive_lod values!
     */
+   bld.num_mips = bld.num_lods = 1;
    if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
        (explicit_lod || lod_bias ||
-        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-       ((is_fetch && target != PIPE_BUFFER) ||
-        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-      bld.num_lods = type.length;
+        (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+      if ((is_fetch && target != PIPE_BUFFER) ||
+          (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+         bld.num_mips = type.length;
+         bld.num_lods = type.length;
+      }
+      else if (!is_fetch && min_img_filter != mag_img_filter) {
+         bld.num_mips = 1;
+         bld.num_lods = type.length;
+      }
+   }
    /* TODO: for true scalar_lod should only use 1 lod value */
-   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
+   else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
             (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+      bld.num_mips = num_quads;
       bld.num_lods = num_quads;
    }
-   else {
-      bld.num_lods = 1;
+   else if (!is_fetch && min_img_filter != mag_img_filter) {
+      bld.num_mips = 1;
+      bld.num_lods = num_quads;
    }
 
-   bld.levelf_type = type;
+
+   bld.lodf_type = type;
    /* we want native vector size to be able to use our intrinsics */
    if (bld.num_lods != type.length) {
-      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+      /* TODO: this currently always has to be per-quad or per-element */
+      bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+   }
+   bld.lodi_type = lp_int_type(bld.lodf_type);
+   bld.levelf_type = bld.lodf_type;
+   if (bld.num_mips == 1) {
+      bld.levelf_type.length = 1;
    }
    bld.leveli_type = lp_int_type(bld.levelf_type);
    bld.float_size_type = bld.float_size_in_type;
    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
-   if (bld.num_lods > 1) {
-      bld.float_size_type.length = bld.num_lods == type.length ?
-                                      bld.num_lods * bld.float_size_in_type.length :
+   if (bld.num_mips > 1) {
+      bld.float_size_type.length = bld.num_mips == type.length ?
+                                      bld.num_mips * bld.float_size_in_type.length :
                                       type.length;
    }
    bld.int_size_type = lp_int_type(bld.float_size_type);
@@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
+   lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
+   lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
 
    /* Get the dynamic state */
    tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
        * (It should be faster if we'd support avx2)
        */
       if (num_quads == 1 || !use_aos) {
-
-         if (num_quads > 1) {
-            if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-               LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
-               /*
-                * This parameter is the same for all quads could probably simplify.
-                */
-               ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
-            }
-         }
          if (use_aos) {
             /* do sampling/filtering with fixed pt arithmetic */
             lp_build_sample_aos(&bld, sampler_index,
@@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
          bld4.texel_type = bld.texel_type;
          bld4.texel_type.length = 4;
-         bld4.levelf_type = type4;
-         /* we want native vector size to be able to use our intrinsics */
-         bld4.levelf_type.length = 1;
-         bld4.leveli_type = lp_int_type(bld4.levelf_type);
 
+         bld4.num_mips = bld4.num_lods = 1;
          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
-               (explicit_lod || lod_bias ||
-                (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
-               ((is_fetch && target != PIPE_BUFFER) ||
-                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-            bld4.num_lods = type4.length;
-         else
-            bld4.num_lods = 1;
+             (explicit_lod || lod_bias ||
+              (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+            if ((is_fetch && target != PIPE_BUFFER) ||
+                (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+               bld4.num_mips = type4.length;
+               bld4.num_lods = type4.length;
+            }
+            else if (!is_fetch && min_img_filter != mag_img_filter) {
+               bld4.num_mips = 1;
+               bld4.num_lods = type4.length;
+            }
+         }
 
-         bld4.levelf_type = type4;
          /* we want native vector size to be able to use our intrinsics */
+         bld4.lodf_type = type4;
          if (bld4.num_lods != type4.length) {
+            bld4.lodf_type.length = 1;
+         }
+         bld4.lodi_type = lp_int_type(bld4.lodf_type);
+         bld4.levelf_type = type4;
+         if (bld4.num_mips != type4.length) {
             bld4.levelf_type.length = 1;
          }
          bld4.leveli_type = lp_int_type(bld4.levelf_type);
          bld4.float_size_type = bld4.float_size_in_type;
-         if (bld4.num_lods > 1) {
-            bld4.float_size_type.length = bld4.num_lods == type4.length ?
-                                            bld4.num_lods * bld4.float_size_in_type.length :
+         if (bld4.num_mips > 1) {
+            bld4.float_size_type.length = bld4.num_mips == type4.length ?
+                                            bld4.num_mips * bld4.float_size_in_type.length :
                                             type4.length;
          }
          bld4.int_size_type = lp_int_type(bld4.float_size_type);
@@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
+         lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
+         lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
 
          for (i = 0; i < num_quads; i++) {
             LLVMValueRef s4, t4, r4;
@@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                }
             }
             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
-            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
+            ilevel04 = bld.num_mips == 1 ? ilevel0 :
+                          lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);