4 files changed, 184 insertions, 240 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8f8410c015b..3291ec40af5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -974,7 +974,7 @@ lp_build_lerp_simple(struct lp_build_context *bld,
                      LLVMValueRef x,
                      LLVMValueRef v0,
                      LLVMValueRef v1,
-                     bool normalized)
+                     unsigned flags)
 {
    unsigned half_width = bld->type.width/2;
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -987,14 +987,17 @@ lp_build_lerp_simple(struct lp_build_context *bld,
 
    delta = lp_build_sub(bld, v1, v0);
 
-   if (normalized) {
+   if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
       if (!bld->type.sign) {
-         /*
-          * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
-          * most-significant-bit to the lowest-significant-bit, so that
-          * later we can just divide by 2**n instead of 2**n - 1.
-          */
-         x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+         if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
+            /*
+             * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
+             * most-significant-bit to the lowest-significant-bit, so that
+             * later we can just divide by 2**n instead of 2**n - 1.
+             */
+
+            x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+         }
 
          /* (x * delta) >> n */
          res = lp_build_mul(bld, x, delta);
@@ -1005,15 +1008,18 @@ lp_build_lerp_simple(struct lp_build_context *bld,
           * use the 2**n - 1 divison approximation in lp_build_mul_norm
           * instead.
           */
+         assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
          res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
       }
    } else {
+      assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS));
       res = lp_build_mul(bld, x, delta);
    }
 
    res = lp_build_add(bld, v0, res);
 
-   if ((normalized && !bld->type.sign) || bld->type.fixed) {
+   if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) ||
+       bld->type.fixed) {
       /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */
       /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
        * but it will be wrong for true fixed point use cases. Basically we need
@@ -1033,7 +1039,8 @@ LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
               LLVMValueRef x,
               LLVMValueRef v0,
-              LLVMValueRef v1)
+              LLVMValueRef v1,
+              unsigned flags)
 {
    const struct lp_type type = bld->type;
    LLVMValueRef res;
@@ -1042,6 +1049,8 @@ lp_build_lerp(struct lp_build_context *bld,
    assert(lp_check_value(type, v0));
    assert(lp_check_value(type, v1));
 
+   assert(!(flags & LP_BLD_LERP_WIDE_NORMALIZED));
+
    if (type.norm) {
       struct lp_type wide_type;
       struct lp_build_context wide_bld;
@@ -1068,18 +1077,25 @@ lp_build_lerp(struct lp_build_context *bld,
        * Lerp both halves.
        */
 
-      resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, TRUE);
-      resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, TRUE);
+      flags |= LP_BLD_LERP_WIDE_NORMALIZED;
+
+      resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, flags);
+      resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, flags);
 
       res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh);
    } else {
-      res = lp_build_lerp_simple(bld, x, v0, v1, FALSE);
+      res = lp_build_lerp_simple(bld, x, v0, v1, flags);
    }
 
    return res;
 }
 
 
+/**
+ * Bilinear interpolation.
+ *
+ * Values indices are in v_{yx}.
+ */
 LLVMValueRef
 lp_build_lerp_2d(struct lp_build_context *bld,
                  LLVMValueRef x,
@@ -1087,11 +1103,12 @@ lp_build_lerp_2d(struct lp_build_context *bld,
                  LLVMValueRef v00,
                  LLVMValueRef v01,
                  LLVMValueRef v10,
-                 LLVMValueRef v11)
+                 LLVMValueRef v11,
+                 unsigned flags)
 {
-   LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
-   LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
-   return lp_build_lerp(bld, y, v0, v1);
+   LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01, flags);
+   LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11, flags);
+   return lp_build_lerp(bld, y, v0, v1, flags);
 }
 
 
@@ -1107,11 +1124,12 @@ lp_build_lerp_3d(struct lp_build_context *bld,
                  LLVMValueRef v100,
                  LLVMValueRef v101,
                  LLVMValueRef v110,
-                 LLVMValueRef v111)
+                 LLVMValueRef v111,
+                 unsigned flags)
 {
-   LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011);
-   LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111);
-   return lp_build_lerp(bld, z, v0, v1);
+   LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011, flags);
+   LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111, flags);
+   return lp_build_lerp(bld, z, v0, v1, flags);
 }
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 45886d5fd99..966796c3c4d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -85,17 +85,27 @@ lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+
+/**
+ * Set when the weights for normalized are prescaled, that is, in range
+ * 0..2**n, as opposed to range 0..2**(n-1).
+ */
+#define LP_BLD_LERP_PRESCALED_WEIGHTS (1 << 0)
+
+/**
+ * Used internally when using wide intermediates for normalized lerps.
+ *
+ * Do not use.
+ */
+#define LP_BLD_LERP_WIDE_NORMALIZED (1 << 1)
+
 LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
               LLVMValueRef x,
               LLVMValueRef v0,
-              LLVMValueRef v1);
+              LLVMValueRef v1,
+              unsigned flags);
 
-/**
- * Bilinear interpolation.
- *
- * Values indices are in v_{yx}.
- */
 LLVMValueRef
 lp_build_lerp_2d(struct lp_build_context *bld,
                  LLVMValueRef x,
@@ -103,7 +113,8 @@ lp_build_lerp_2d(struct lp_build_context *bld,
                  LLVMValueRef v00,
                  LLVMValueRef v01,
                  LLVMValueRef v10,
-                 LLVMValueRef v11);
+                 LLVMValueRef v11,
+                 unsigned flags);
 
 LLVMValueRef
 lp_build_lerp_3d(struct lp_build_context *bld,
@@ -117,7 +128,8 @@ lp_build_lerp_3d(struct lp_build_context *bld,
                  LLVMValueRef v100,
                  LLVMValueRef v101,
                  LLVMValueRef v110,
-                 LLVMValueRef v111);
+                 LLVMValueRef v111,
+                 unsigned flags);
 
 
 LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 9eaca029fda..c31b05d7022 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -496,8 +496,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                                     LLVMValueRef offset,
                                     LLVMValueRef x_subcoord,
                                     LLVMValueRef y_subcoord,
-                                    LLVMValueRef *colors_lo,
-                                    LLVMValueRef *colors_hi)
+                                    LLVMValueRef *colors)
 {
    /*
     * Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -517,10 +516,9 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
     */
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef rgba8;
-   struct lp_build_context h16, u8n;
+   struct lp_build_context u8n;
    LLVMTypeRef u8n_vec_type;
 
-   lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 
@@ -546,10 +544,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                                       y_subcoord);
    }
 
-   /* Expand one 4*rgba8 to two 2*rgba16 */
-   lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
-                    rgba8,
-                    colors_lo, colors_hi);
+   *colors = rgba8;
 }
 
 
@@ -569,8 +564,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                               LLVMValueRef t,
                               LLVMValueRef r,
                               const LLVMValueRef *offsets,
-                              LLVMValueRef *colors_lo,
-                              LLVMValueRef *colors_hi)
+                              LLVMValueRef *colors)
 {
    const unsigned dims = bld->dims;
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -694,7 +688,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 
    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
                                        x_subcoord, y_subcoord,
-                                       colors_lo, colors_hi);
+                                       colors);
 }
 
 
@@ -716,8 +710,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
                                      LLVMValueRef t,
                                      LLVMValueRef r,
                                      const LLVMValueRef *offsets,
-                                     LLVMValueRef *colors_lo,
-                                     LLVMValueRef *colors_hi)
+                                     LLVMValueRef *colors)
    {
    const unsigned dims = bld->dims;
    LLVMValueRef width_vec, height_vec, depth_vec;
@@ -787,7 +780,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
 
    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
                                        x_subcoord, y_subcoord,
-                                       colors_lo, colors_hi);
+                                       colors);
 }
 
 
@@ -804,29 +797,21 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                    LLVMValueRef s_fpart,
                                    LLVMValueRef t_fpart,
                                    LLVMValueRef r_fpart,
-                                   LLVMValueRef *colors_lo,
-                                   LLVMValueRef *colors_hi)
+                                   LLVMValueRef *colors)
 {
    const unsigned dims = bld->dims;
    LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context h16, u8n;
-   LLVMTypeRef h16_vec_type, u8n_vec_type;
+   struct lp_build_context u8n;
+   LLVMTypeRef u8n_vec_type;
    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
-   LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef shuffle_lo, shuffle_hi;
-   LLVMValueRef s_fpart_lo, s_fpart_hi;
-   LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL;
-   LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL;
-   LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
-   LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
-   LLVMValueRef packed_lo, packed_hi;
+   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef shuffle;
+   LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
+   LLVMValueRef packed;
    unsigned i, j, k;
    unsigned numj, numk;
 
-   lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
-   h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
 
    /*
@@ -834,59 +819,45 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
     *
     *   s_fpart = {s0, s1, s2, s3}
     *
-    * into 8 x i16
-    *
-    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+    * where each value is between 0 and 0xff,
     *
-    * into two 8 x i16
+    * into one 16 x i20
     *
-    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
-    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+    *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
     *
     * and likewise for t_fpart. There is no risk of loosing precision here
     * since the fractional parts only use the lower 8bits.
     */
-   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+   s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
    if (dims >= 2)
-      t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+      t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
    if (dims >= 3)
-      r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
+      r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
 
-   for (j = 0; j < h16.type.length; j += 4) {
+   for (j = 0; j < u8n.type.length; j += 4) {
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
       unsigned subindex = 0;
 #else
-      unsigned subindex = 1;
+      unsigned subindex = 3;
 #endif
       LLVMValueRef index;
 
-      index = LLVMConstInt(elem_type, j/2 + subindex, 0);
-      for (i = 0; i < 4; ++i)
-         shuffles_lo[j + i] = index;
-
-      index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+      index = LLVMConstInt(elem_type, j + subindex, 0);
       for (i = 0; i < 4; ++i)
-         shuffles_hi[j + i] = index;
+         shuffles[j + i] = index;
    }
 
-   shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
-   shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+   shuffle = LLVMConstVector(shuffles, u8n.type.length);
 
-   s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
-                                       shuffle_lo, "");
-   s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
-                                       shuffle_hi, "");
+   s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
+                                    shuffle, "");
    if (dims >= 2) {
-      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
-                                          shuffle_lo, "");
-      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
-                                          shuffle_hi, "");
+      t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
+                                       shuffle, "");
    }
    if (dims >= 3) {
-      r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
-                                          shuffle_lo, "");
-      r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
-                                          shuffle_hi, "");
+      r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
+                                       shuffle, "");
    }
 
    /*
@@ -935,10 +906,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                                y_subcoord[j]);
             }
 
-            /* Expand one 4*rgba8 to two 2*rgba16 */
-            lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
-                             rgba8,
-                             &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
+            neighbors[k][j][i] = rgba8;
          }
       }
    }
@@ -948,84 +916,55 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
     */
    if (bld->static_sampler_state->force_nearest_s) {
       /* special case 1-D lerp */
-      packed_lo = lp_build_lerp(&h16,
-                                t_fpart_lo,
-                                neighbors_lo[0][0][0],
-                                neighbors_lo[0][0][1]);
-
-      packed_hi = lp_build_lerp(&h16,
-                                t_fpart_hi,
-                                neighbors_hi[0][1][0],
-                                neighbors_hi[0][1][0]);
+      packed = lp_build_lerp(&u8n,
+                             t_fpart,
+                             neighbors[0][0][0],
+                             neighbors[0][0][1],
+                             LP_BLD_LERP_PRESCALED_WEIGHTS);
    }
    else if (bld->static_sampler_state->force_nearest_t) {
       /* special case 1-D lerp */
-      packed_lo = lp_build_lerp(&h16,
-                                s_fpart_lo,
-                                neighbors_lo[0][0][0],
-                                neighbors_lo[0][0][1]);
-
-      packed_hi = lp_build_lerp(&h16,
-                                s_fpart_hi,
-                                neighbors_hi[0][0][0],
-                                neighbors_hi[0][0][1]);
+      packed = lp_build_lerp(&u8n,
+                             s_fpart,
+                             neighbors[0][0][0],
+                             neighbors[0][0][1],
+                             LP_BLD_LERP_PRESCALED_WEIGHTS);
    }
    else {
       /* general 1/2/3-D lerping */
       if (dims == 1) {
-         packed_lo = lp_build_lerp(&h16,
-                                   s_fpart_lo,
-                                   neighbors_lo[0][0][0],
-                                   neighbors_lo[0][0][1]);
-
-         packed_hi = lp_build_lerp(&h16,
-                                   s_fpart_hi,
-                                   neighbors_hi[0][0][0],
-                                   neighbors_hi[0][0][1]);
+         packed = lp_build_lerp(&u8n,
+                                s_fpart,
+                                neighbors[0][0][0],
+                                neighbors[0][0][1],
+                                LP_BLD_LERP_PRESCALED_WEIGHTS);
       } else if (dims == 2) {
          /* 2-D lerp */
-         packed_lo = lp_build_lerp_2d(&h16,
-                                      s_fpart_lo, t_fpart_lo,
-                                      neighbors_lo[0][0][0],
-                                      neighbors_lo[0][0][1],
-                                      neighbors_lo[0][1][0],
-                                      neighbors_lo[0][1][1]);
-
-         packed_hi = lp_build_lerp_2d(&h16,
-                                      s_fpart_hi, t_fpart_hi,
-                                      neighbors_hi[0][0][0],
-                                      neighbors_hi[0][0][1],
-                                      neighbors_hi[0][1][0],
-                                      neighbors_hi[0][1][1]);
+         packed = lp_build_lerp_2d(&u8n,
+                                   s_fpart, t_fpart,
+                                   neighbors[0][0][0],
+                                   neighbors[0][0][1],
+                                   neighbors[0][1][0],
+                                   neighbors[0][1][1],
+                                   LP_BLD_LERP_PRESCALED_WEIGHTS);
       } else {
          /* 3-D lerp */
          assert(dims == 3);
-         packed_lo = lp_build_lerp_3d(&h16,
-                                      s_fpart_lo, t_fpart_lo, r_fpart_lo,
-                                      neighbors_lo[0][0][0],
-                                      neighbors_lo[0][0][1],
-                                      neighbors_lo[0][1][0],
-                                      neighbors_lo[0][1][1],
-                                      neighbors_lo[1][0][0],
-                                      neighbors_lo[1][0][1],
-                                      neighbors_lo[1][1][0],
-                                      neighbors_lo[1][1][1]);
-
-         packed_hi = lp_build_lerp_3d(&h16,
-                                      s_fpart_hi, t_fpart_hi, r_fpart_hi,
-                                      neighbors_hi[0][0][0],
-                                      neighbors_hi[0][0][1],
-                                      neighbors_hi[0][1][0],
-                                      neighbors_hi[0][1][1],
-                                      neighbors_hi[1][0][0],
-                                      neighbors_hi[1][0][1],
-                                      neighbors_hi[1][1][0],
-                                      neighbors_hi[1][1][1]);
+         packed = lp_build_lerp_3d(&u8n,
+                                   s_fpart, t_fpart, r_fpart,
+                                   neighbors[0][0][0],
+                                   neighbors[0][0][1],
+                                   neighbors[0][1][0],
+                                   neighbors[0][1][1],
+                                   neighbors[1][0][0],
+                                   neighbors[1][0][1],
+                                   neighbors[1][1][0],
+                                   neighbors[1][1][1],
+                                   LP_BLD_LERP_PRESCALED_WEIGHTS);
       }
    }
 
-   *colors_lo = packed_lo;
-   *colors_hi = packed_hi;
+   *colors = packed;
 }
 
 /**
@@ -1043,8 +982,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                              LLVMValueRef t,
                              LLVMValueRef r,
                              const LLVMValueRef *offsets,
-                             LLVMValueRef *colors_lo,
-                             LLVMValueRef *colors_hi)
+                             LLVMValueRef *colors)
 {
    const unsigned dims = bld->dims;
    LLVMBuilderRef builder = bld->gallivm->builder;
@@ -1223,7 +1161,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
                                       x_subcoord, y_subcoord,
                                       s_fpart, t_fpart, r_fpart,
-                                      colors_lo, colors_hi);
+                                      colors);
 }
 
 
@@ -1244,8 +1182,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
                                     LLVMValueRef t,
                                     LLVMValueRef r,
                                     const LLVMValueRef *offsets,
-                                    LLVMValueRef *colors_lo,
-                                    LLVMValueRef *colors_hi)
+                                    LLVMValueRef *colors)
 {
    const unsigned dims = bld->dims;
    LLVMValueRef width_vec, height_vec, depth_vec;
@@ -1395,7 +1332,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
                                       x_subcoord, y_subcoord,
                                       s_fpart, t_fpart, r_fpart,
-                                      colors_lo, colors_hi);
+                                      colors);
 }
 
 
@@ -1416,8 +1353,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                        LLVMValueRef ilevel0,
                        LLVMValueRef ilevel1,
                        LLVMValueRef lod_fpart,
-                       LLVMValueRef colors_lo_var,
-                       LLVMValueRef colors_hi_var)
+                       LLVMValueRef colors_var)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef size0;
@@ -1430,8 +1366,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    LLVMValueRef data_ptr1;
    LLVMValueRef mipoff0 = NULL;
    LLVMValueRef mipoff1 = NULL;
-   LLVMValueRef colors0_lo, colors0_hi;
-   LLVMValueRef colors1_lo, colors1_hi;
+   LLVMValueRef colors0;
+   LLVMValueRef colors1;
 
    /* sample the first mipmap level */
    lp_build_mipmap_level_sizes(bld, ilevel0,
@@ -1452,7 +1388,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                               size0,
                                               row_stride0_vec, img_stride0_vec,
                                               data_ptr0, mipoff0, s, t, r, offsets,
-                                              &colors0_lo, &colors0_hi);
+                                              &colors0);
       }
       else {
          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
@@ -1460,7 +1396,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                              size0,
                                              row_stride0_vec, img_stride0_vec,
                                              data_ptr0, mipoff0, s, t, r, offsets,
-                                             &colors0_lo, &colors0_hi);
+                                             &colors0);
       }
    }
    else {
@@ -1469,7 +1405,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                        size0,
                                        row_stride0_vec, img_stride0_vec,
                                        data_ptr0, mipoff0, s, t, r, offsets,
-                                       &colors0_lo, &colors0_hi);
+                                       &colors0);
       }
       else {
          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
@@ -1477,13 +1413,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                       size0,
                                       row_stride0_vec, img_stride0_vec,
                                       data_ptr0, mipoff0, s, t, r, offsets,
-                                      &colors0_lo, &colors0_hi);
+                                      &colors0);
       }
    }
 
    /* Store the first level's colors in the output variables */
-   LLVMBuildStore(builder, colors0_lo, colors_lo_var);
-   LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+   LLVMBuildStore(builder, colors0, colors_var);
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
@@ -1522,9 +1457,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
       {
-         struct lp_build_context h16_bld;
+         struct lp_build_context u8n_bld;
 
-         lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
+         lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 
          /* sample the second mipmap level */
          lp_build_mipmap_level_sizes(bld, ilevel1,
@@ -1547,14 +1482,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                                     size1,
                                                     row_stride1_vec, img_stride1_vec,
                                                     data_ptr1, mipoff1, s, t, r, offsets,
-                                                    &colors1_lo, &colors1_hi);
+                                                    &colors1);
             }
             else {
                lp_build_sample_image_linear_afloat(bld,
                                                    size1,
                                                    row_stride1_vec, img_stride1_vec,
                                                    data_ptr1, mipoff1, s, t, r, offsets,
-                                                   &colors1_lo, &colors1_hi);
+                                                   &colors1);
             }
          }
          else {
@@ -1563,73 +1498,55 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                              size1,
                                              row_stride1_vec, img_stride1_vec,
                                              data_ptr1, mipoff1, s, t, r, offsets,
-                                             &colors1_lo, &colors1_hi);
+                                             &colors1);
             }
             else {
                lp_build_sample_image_linear(bld,
                                             size1,
                                             row_stride1_vec, img_stride1_vec,
                                             data_ptr1, mipoff1, s, t, r, offsets,
-                                            &colors1_lo, &colors1_hi);
+                                            &colors1);
             }
          }
 
          /* interpolate samples from the two mipmap levels */
 
          if (num_quads == 1) {
-            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
-            lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
+            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
+            lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
 
 #if HAVE_LLVM == 0x208
-            /* This is a work-around for a bug in LLVM 2.8.
+            /* This was a work-around for a bug in LLVM 2.8.
              * Evidently, something goes wrong in the construction of the
              * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
              * to force the vector to be properly constructed.
              * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
              */
-            {
-               LLVMValueRef shuffles[8], shuffle;
-               assert(h16_bld.type.length <= Elements(shuffles));
-               for (i = 0; i < h16_bld.type.length; i++)
-                  shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
-               shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
-               lod_fpart = LLVMBuildShuffleVector(builder,
-                                                  lod_fpart, lod_fpart,
-                                                  shuffle, "");
-            }
+#error Unsupported
 #endif
-
-            colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
-                                       colors0_lo, colors1_lo);
-            colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
-                                       colors0_hi, colors1_hi);
          }
          else {
-            LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16];
-            struct lp_type perquadi16_type = bld->perquadi_bld.type;
-            perquadi16_type.width /= 2;
-            perquadi16_type.length *= 2;
-            lod_fpart = LLVMBuildBitCast(builder, lod_fpart,
-                                         lp_build_vec_type(bld->gallivm,
-                                                           perquadi16_type), "");
-            /* XXX this only works for exactly 2 quads. More quads need shuffle */
-            assert(num_quads == 2);
-            for (i = 0; i < num_quads; i++) {
-               LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2);
-               lod_parts[i] = lp_build_extract_broadcast(bld->gallivm,
-                                                         perquadi16_type,
-                                                         h16_bld.type,
-                                                         lod_fpart,
-                                                         indexi2);
+            const unsigned num_chans_per_quad = 4 * 4;
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length);
+            LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
+
+            /* Take the LSB of lod_fpart */
+            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
+
+            /* Broadcast each lod weight into their respective channels */
+            assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
+            for (i = 0; i < u8n_bld.type.length; ++i) {
+               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad);
             }
-            colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0],
-                                       colors0_lo, colors1_lo);
-            colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1],
-                                       colors0_hi, colors1_hi);
+            lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
+                                               LLVMConstVector(shuffle, u8n_bld.type.length), "");
          }
 
-         LLVMBuildStore(builder, colors0_lo, colors_lo_var);
-         LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+         colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
+                                 colors0, colors1,
+                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
+
+         LLVMBuildStore(builder, colors0, colors_var);
       }
       lp_build_endif(&if_ctx);
    }
@@ -1661,9 +1578,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
    const unsigned dims = bld->dims;
-   LLVMValueRef packed, packed_lo, packed_hi;
+   LLVMValueRef packed_var, packed;
    LLVMValueRef unswizzled[4];
-   struct lp_build_context h16_bld;
+   struct lp_build_context u8n_bld;
 
    /* we only support the common/simple wrap modes at this time */
    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
@@ -1673,15 +1590,14 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
 
 
-   /* make 16-bit fixed-pt builder context */
-   lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
+   /* make 8-bit unorm builder context */
+   lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
 
    /*
     * Get/interpolate texture colors.
     */
 
-   packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
-   packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
+   packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
 
    if (min_filter == mag_filter) {
       /* no need to distinguish between minification and magnification */
@@ -1689,7 +1605,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                              min_filter, mip_filter,
                              s, t, r, offsets,
                              ilevel0, ilevel1, lod_fpart,
-                             packed_lo, packed_hi);
+                             packed_var);
    }
    else {
       /* Emit conditional to choose min image filter or mag image filter
@@ -1722,7 +1638,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                                 min_filter, mip_filter,
                                 s, t, r, offsets,
                                 ilevel0, ilevel1, lod_fpart,
-                                packed_lo, packed_hi);
+                                packed_var);
       }
       lp_build_else(&if_ctx);
       {
@@ -1731,19 +1647,12 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
                                 s, t, r, offsets,
                                 ilevel0, NULL, NULL,
-                                packed_lo, packed_hi);
+                                packed_var);
       }
       lp_build_endif(&if_ctx);
    }
 
-   /*
-    * combine the values stored in 'packed_lo' and 'packed_hi' variables
-    * into 'packed'
-    */
-   packed = lp_build_pack2(bld->gallivm,
-                           h16_bld.type, lp_type_unorm(8, bld->vector_width),
-                           LLVMBuildLoad(builder, packed_lo, ""),
-                           LLVMBuildLoad(builder, packed_hi, ""));
+   packed = LLVMBuildLoad(builder, packed_var, "");
 
    /*
     * Convert to SoA and swizzle.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index beefdaed513..cc29c5c885f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -822,7 +822,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
       for (chan = 0; chan < 4; chan++) {
          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
                                           neighbors[0][0][chan],
-                                          neighbors[0][1][chan]);
+                                          neighbors[0][1][chan],
+                                          0);
       }
    }
    else {
@@ -848,7 +849,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                           neighbors[0][0][chan],
                                           neighbors[0][1][chan],
                                           neighbors[1][0][chan],
-                                          neighbors[1][1][chan]);
+                                          neighbors[1][1][chan],
+                                          0);
       }
 
       if (dims == 3) {
@@ -884,14 +886,16 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                              neighbors1[0][0][chan],
                                              neighbors1[0][1][chan],
                                              neighbors1[1][0][chan],
-                                             neighbors1[1][1][chan]);
+                                             neighbors1[1][1][chan],
+                                             0);
          }
 
          /* Linearly interpolate the two samples from the two 3D slices */
          for (chan = 0; chan < 4; chan++) {
             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
                                              r_fpart,
-                                             colors0[chan], colors1[chan]);
+                                             colors0[chan], colors1[chan],
+                                             0);
          }
       }
       else {
@@ -1038,7 +1042,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 
          for (chan = 0; chan < 4; chan++) {
             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
-                                          colors0[chan], colors1[chan]);
+                                          colors0[chan], colors1[chan],
+                                          0);
             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
          }
       }