5 files changed, 81 insertions, 66 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 95541041e31..89d72494be0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -162,7 +162,8 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
       state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
    }
 
-   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
+       state->min_img_filter != state->mag_img_filter) {
       if (sampler->lod_bias != 0.0f) {
          state->lod_bias_non_zero = 1;
       }
@@ -669,9 +670,10 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
  * \param lod_bias  optional float vector with the shader lod bias
  * \param explicit_lod  optional float vector with the explicit lod
- * \param width  scalar int texture width
- * \param height  scalar int texture height
- * \param depth  scalar int texture depth
+ * \param cube_rho  rho calculated by cube coord mapping (optional)
+ * \param out_lod_ipart  integer part of lod
+ * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
+ * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
  *
  * The resulting lod is scalar per quad, so only the first value per quad
  * passed in from lod_bias, explicit_lod is used.
@@ -689,7 +691,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       unsigned mip_filter,
                       LLVMValueRef *out_lod_ipart,
-                      LLVMValueRef *out_lod_fpart)
+                      LLVMValueRef *out_lod_fpart,
+                      LLVMValueRef *out_lod_positive)
 
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -697,8 +700,27 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
    LLVMValueRef lod;
 
    *out_lod_ipart = bld->leveli_bld.zero;
+   *out_lod_positive = bld->leveli_bld.zero;
    *out_lod_fpart = levelf_bld->zero;
 
+   /*
+    * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
+    * "Implementations may either unconditionally assume c = 0 for the minification
+    * vs. magnification switch-over point, or may choose to make c depend on the
+    * combination of minification and magnification modes as follows: if the
+    * magnification filter is given by LINEAR and the minification filter is given
+    * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
+    * done to ensure that a minified texture does not appear "sharper" than a
+    * magnified texture. Otherwise c = 0."
+    * And 3.9.11 Texture Minification:
+    * "If lod is less than or equal to the constant c (see section 3.9.12) the
+    * texture is said to be magnified; if it is greater, the texture is minified."
+    * So, using 0 as switchover point always, and using magnification for lod == 0.
+    * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
+    * old GL versions required 0.5 for the modes listed above.
+    * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
+    */
+
    if (bld->static_sampler_state->min_max_lod_equal) {
       /* User is forcing sampling from a particular mipmap level.
        * This is hit during mipmap generation.
@@ -739,21 +761,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
                /*
-                * FIXME: this is not entirely correct, as out_lod_ipart is used
-                * both for mip level determination as well as mag/min switchover
-                * point (if different min/mag filters are used). In particular,
-                * lod values between [-0.5,0] (rho between [sqrt(2), 1.0]) will
-                * incorrectly use min filter instead of mag (the non-optimized
-                * calculation further down has exactly the same problem).
+                * Don't actually need both all the time, ipart is needed
+                * for nearest mipfilter, pos_or_zero if min != mag.
                 */
                *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_fpart = levelf_bld->zero;
+               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                                rho, levelf_bld->one);
                return;
             }
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
                lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
                                       out_lod_ipart, out_lod_fpart);
+               *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                                rho, levelf_bld->one);
                return;
             }
          }
@@ -803,6 +824,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       }
    }
 
+   *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
+                                    lod, levelf_bld->zero);
+
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
          lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 067a995a2be..a7ebe7e9ed8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -388,7 +388,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef explicit_lod, /* optional */
                       unsigned mip_filter,
                       LLVMValueRef *out_lod_ipart,
-                      LLVMValueRef *out_lod_fpart);
+                      LLVMValueRef *out_lod_fpart,
+                      LLVMValueRef *out_lod_positive);
 
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index b9227b52fb4..7431388812d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1562,13 +1562,12 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     const LLVMValueRef *offsets,
-                    LLVMValueRef lod_ipart,
+                    LLVMValueRef lod_positive,
                     LLVMValueRef lod_fpart,
                     LLVMValueRef ilevel0,
                     LLVMValueRef ilevel1,
                     LLVMValueRef texel_out[4])
 {
-   struct lp_build_context *int_bld = &bld->int_bld;
    LLVMBuilderRef builder = bld->gallivm->builder;
    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
@@ -1608,26 +1607,20 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
        * depending on the lod being > 0 or <= 0, respectively.
        */
       struct lp_build_if_state if_ctx;
-      LLVMValueRef minify;
 
       /*
-       * XXX this should take all lods into account, if some are min
-       * some max probably could hack up the coords/weights in the linear
+       * FIXME this should take all lods into account, if some are min
+       * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
-       * If that's just two quads sitting next to each other it seems
-       * quite ok to do the same filtering method on both though, at
-       * least unless we have explicit lod (and who uses different
-       * min/mag filter with that?)
        */
-      if (bld->num_lods > 1)
-         lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
-                                              lp_build_const_int32(bld->gallivm, 0), "");
+      if (bld->leveli_bld.type.length > 1)
+         lod_positive = LLVMBuildExtractElement(builder, lod_positive,
+                                                lp_build_const_int32(bld->gallivm, 0), "");
 
-      /* minify = lod >= 0.0 */
-      minify = LLVMBuildICmp(builder, LLVMIntSGE,
-                             lod_ipart, int_bld->zero, "");
+      lod_positive = LLVMBuildTrunc(builder, lod_positive,
+                                    LLVMInt1TypeInContext(bld->gallivm->context), "");
 
-      lp_build_if(&if_ctx, bld->gallivm, minify);
+      lp_build_if(&if_ctx, bld->gallivm, lod_positive);
       {
          /* Use the minification filter */
          lp_build_sample_mipmap(bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
index 6fce9712a48..da503d2dcc8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -47,7 +47,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     const LLVMValueRef *offsets,
-                    LLVMValueRef lod_ipart,
+                    LLVMValueRef lod_positive,
                     LLVMValueRef lod_fpart,
                     LLVMValueRef ilevel0,
                     LLVMValueRef ilevel1,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 743dd0a4501..8ad3b9f246a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1239,7 +1239,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                        const struct lp_derivatives *derivs, /* optional */
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
-                       LLVMValueRef *lod_ipart,
+                       LLVMValueRef *lod_pos_or_zero,
                        LLVMValueRef *lod_fpart,
                        LLVMValueRef *ilevel0,
                        LLVMValueRef *ilevel1)
@@ -1249,6 +1249,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
    const unsigned target = bld->static_texture_state->target;
    LLVMValueRef first_level, cube_rho = NULL;
+   LLVMValueRef lod_ipart = NULL;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1309,9 +1310,10 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                             coords[0], coords[1], coords[2], cube_rho,
                             derivs, lod_bias, explicit_lod,
                             mip_filter,
-                            lod_ipart, lod_fpart);
+                            &lod_ipart, lod_fpart, lod_pos_or_zero);
    } else {
-      *lod_ipart = bld->leveli_bld.zero;
+      lod_ipart = bld->leveli_bld.zero;
+      *lod_pos_or_zero = bld->leveli_bld.zero;
    }
 
    /*
@@ -1328,8 +1330,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
           * We should be able to set ilevel0 = const(0) but that causes
           * bad x86 code to be emitted.
           */
-         assert(*lod_ipart);
-         lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
+         assert(lod_ipart);
+         lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
       }
       else {
          first_level = bld->dynamic_state->first_level(bld->dynamic_state,
@@ -1339,14 +1341,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       }
       break;
    case PIPE_TEX_MIPFILTER_NEAREST:
-      assert(*lod_ipart);
-      lp_build_nearest_mip_level(bld, texture_index, *lod_ipart, ilevel0, NULL);
+      assert(lod_ipart);
+      lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
       break;
    case PIPE_TEX_MIPFILTER_LINEAR:
-      assert(*lod_ipart);
+      assert(lod_ipart);
       assert(*lod_fpart);
       lp_build_linear_mip_levels(bld, texture_index,
-                                 *lod_ipart, lod_fpart,
+                                 lod_ipart, lod_fpart,
                                  ilevel0, ilevel1);
       break;
    }
@@ -1581,13 +1583,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                         unsigned sampler_unit,
                         LLVMValueRef *coords,
                         const LLVMValueRef *offsets,
-                        LLVMValueRef lod_ipart,
+                        LLVMValueRef lod_positive,
                         LLVMValueRef lod_fpart,
                         LLVMValueRef ilevel0,
                         LLVMValueRef ilevel1,
                         LLVMValueRef *colors_out)
 {
-   struct lp_build_context *int_bld = &bld->int_bld;
    LLVMBuilderRef builder = bld->gallivm->builder;
    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
    const unsigned mip_filter = sampler_state->min_mip_filter;
@@ -1634,26 +1635,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
        * depending on the lod being > 0 or <= 0, respectively.
        */
       struct lp_build_if_state if_ctx;
-      LLVMValueRef minify;
 
       /*
-       * XXX this should take all lods into account, if some are min
-       * some max probably could hack up the coords/weights in the linear
+       * FIXME this should take all lods into account, if some are min
+       * some max probably could hack up the weights in the linear
        * path with selects to work for nearest.
-       * If that's just two quads sitting next to each other it seems
-       * quite ok to do the same filtering method on both though, at
-       * least unless we have explicit lod (and who uses different
-       * min/mag filter with that?)
        */
-      if (bld->num_lods > 1)
-         lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
-                                             lp_build_const_int32(bld->gallivm, 0), "");
+      if (bld->leveli_bld.type.length > 1)
+         lod_positive = LLVMBuildExtractElement(builder, lod_positive,
+                                                lp_build_const_int32(bld->gallivm, 0), "");
 
-      /* minify = lod >= 0.0 */
-      minify = LLVMBuildICmp(builder, LLVMIntSGE,
-                             lod_ipart, int_bld->zero, "");
+      lod_positive = LLVMBuildTrunc(builder, lod_positive,
+                                    LLVMInt1TypeInContext(bld->gallivm->context), "");
 
-      lp_build_if(&if_ctx, bld->gallivm, minify);
+      lp_build_if(&if_ctx, bld->gallivm, lod_positive);
       {
          /* Use the minification filter */
          lp_build_sample_mipmap(bld, sampler_unit,
@@ -1941,6 +1936,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    /*
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
+    * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
+    * have length 4. Because lod_selector is always using per quad calcs in this
+    * case, but minification etc. don't need to bother. This is very brittle though
+    * e.g. num_lods might be 1 but still have multiple positive_lod values!
     */
    if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
        (explicit_lod || lod_bias ||
@@ -2034,7 +2033,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
    }
 
    else {
-      LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
+      LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
                         /* not sure this is strictly needed or simply impossible */
@@ -2063,7 +2062,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
       lp_build_sample_common(&bld, texture_index, sampler_index,
                              newcoords,
                              derivs, lod_bias, explicit_lod,
-                             &lod_ipart, &lod_fpart,
+                             &lod_positive, &lod_fpart,
                              &ilevel0, &ilevel1);
 
       /*
@@ -2077,10 +2076,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
             if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
                LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
                /*
-                * These parameters are the same for all quads,
-                * could probably simplify.
+                * This parameter is the same for all quads could probably simplify.
                 */
-               lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, "");
                ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
             }
          }
@@ -2089,7 +2086,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
             lp_build_sample_aos(&bld, sampler_index,
                                 newcoords[0], newcoords[1],
                                 newcoords[2],
-                                offsets, lod_ipart, lod_fpart,
+                                offsets, lod_positive, lod_fpart,
                                 ilevel0, ilevel1,
                                 texel_out);
          }
@@ -2097,7 +2094,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          else {
             lp_build_sample_general(&bld, sampler_index,
                                     newcoords, offsets,
-                                    lod_ipart, lod_fpart,
+                                    lod_positive, lod_fpart,
                                     ilevel0, ilevel1,
                                     texel_out);
          }
@@ -2180,7 +2177,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
 
          for (i = 0; i < num_quads; i++) {
             LLVMValueRef s4, t4, r4;
-            LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
+            LLVMValueRef lod_positive4, lod_fpart4 = NULL;
             LLVMValueRef ilevel04, ilevel14 = NULL;
             LLVMValueRef offsets4[4] = { NULL };
             unsigned num_lods = bld4.num_lods;
@@ -2198,7 +2195,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                   }
                }
             }
-            lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
+            lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
             ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
@@ -2209,7 +2206,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                /* do sampling/filtering with fixed pt arithmetic */
                lp_build_sample_aos(&bld4, sampler_index,
                                    s4, t4, r4, offsets4,
-                                   lod_ipart4, lod_fpart4,
+                                   lod_positive4, lod_fpart4,
                                    ilevel04, ilevel14,
                                    texelout4);
             }
@@ -2225,7 +2222,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
 
                lp_build_sample_general(&bld4, sampler_index,
                                        newcoords4, offsets4,
-                                       lod_ipart4, lod_fpart4,
+                                       lod_positive4, lod_fpart4,
                                        ilevel04, ilevel14,
                                        texelout4);
             }