1 files changed, 207 insertions, 95 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 7a64392d3c1..844d1d935b5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -47,8 +47,7 @@
 
 
 /*
- * Bri-linear factor. Use zero or any other number less than one to force
- * tri-linear filtering.
+ * Bri-linear factor. Should be greater than one.
  */
 #define BRILINEAR_FACTOR 2
 
@@ -201,8 +200,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef float_size;
    LLVMValueRef rho;
 
-   dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
-   dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+   dsdx = ddx[0];
+   dsdy = ddy[0];
 
    if (dims <= 1) {
       rho_x = dsdx;
@@ -215,15 +214,15 @@ lp_build_rho(struct lp_build_sample_context *bld,
       rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
       rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");
 
-      dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
-      dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+      dtdx = ddx[1];
+      dtdy = ddy[1];
 
       rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
       rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");
 
       if (dims >= 3) {
-         drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
-         drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+         drdx = ddx[2];
+         drdy = ddy[2];
 
          rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
          rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
@@ -294,31 +293,30 @@ lp_build_rho(struct lp_build_sample_context *bld,
  * TODO: This could be done in fixed point, where applicable.
  */
 static void
-lp_build_brilinear_lod(struct lp_build_sample_context *bld,
+lp_build_brilinear_lod(struct lp_build_context *bld,
                        LLVMValueRef lod,
                        double factor,
                        LLVMValueRef *out_lod_ipart,
                        LLVMValueRef *out_lod_fpart)
 {
-   struct lp_build_context *float_bld = &bld->float_bld;
    LLVMValueRef lod_fpart;
-   float pre_offset = (factor - 0.5)/factor - 0.5;
-   float post_offset = 1 - factor;
+   double pre_offset = (factor - 0.5)/factor - 0.5;
+   double post_offset = 1 - factor;
 
    if (0) {
       lp_build_printf(bld->builder, "lod = %f\n", lod);
    }
 
-   lod = lp_build_add(float_bld, lod,
-                      lp_build_const_vec(float_bld->type, pre_offset));
+   lod = lp_build_add(bld, lod,
+                      lp_build_const_vec(bld->type, pre_offset));
 
-   lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, &lod_fpart);
+   lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
 
-   lod_fpart = lp_build_mul(float_bld, lod_fpart,
-                            lp_build_const_vec(float_bld->type, factor));
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, factor));
 
-   lod_fpart = lp_build_add(float_bld, lod_fpart,
-                            lp_build_const_vec(float_bld->type, post_offset));
+   lod_fpart = lp_build_add(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, post_offset));
 
    /*
     * It's not necessary to clamp lod_fpart since:
@@ -335,6 +333,61 @@ lp_build_brilinear_lod(struct lp_build_sample_context *bld,
 }
 
 
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+                       LLVMValueRef rho,
+                       double factor,
+                       LLVMValueRef *out_lod_ipart,
+                       LLVMValueRef *out_lod_fpart)
+{
+   LLVMValueRef lod_ipart;
+   LLVMValueRef lod_fpart;
+
+   const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+   const double post_offset = 1 - 2*factor;
+
+   assert(bld->type.floating);
+
+   assert(lp_check_value(bld->type, rho));
+
+   /*
+    * The pre factor will make the intersections with the exact powers of two
+    * happen precisely where we want then to be, which means that the integer
+    * part will not need any post adjustments.
+    */
+   rho = lp_build_mul(bld, rho,
+                      lp_build_const_vec(bld->type, pre_factor));
+
+   /* ipart = ifloor(log2(rho)) */
+   lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+   /* fpart = rho / 2**ipart */
+   lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+   lod_fpart = lp_build_mul(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, factor));
+
+   lod_fpart = lp_build_add(bld, lod_fpart,
+                            lp_build_const_vec(bld->type, post_offset));
+
+   /*
+    * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+    * - the above expression will never produce numbers greater than one.
+    * - the mip filtering branch is only taken if lod_fpart is positive
+    */
+
+   *out_lod_ipart = lod_ipart;
+   *out_lod_fpart = lod_fpart;
+}
+
+
 /**
  * Generate code to compute texture level of detail (lambda).
  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -389,16 +442,32 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
          rho = lp_build_rho(bld, ddx, ddy);
 
-         /* compute lod = log2(rho) */
-         if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
-              mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
-             !lod_bias &&
+         /*
+          * Compute lod = log2(rho)
+          */
+
+         if (!lod_bias &&
              !bld->static_state->lod_bias_non_zero &&
              !bld->static_state->apply_max_lod &&
              !bld->static_state->apply_min_lod) {
-            *out_lod_ipart = lp_build_ilog2(float_bld, rho);
-            *out_lod_fpart = bld->float_bld.zero;
-            return;
+            /*
+             * Special case when there are no post-log2 adjustments, which
+             * saves instructions but keeping the integer and fractional lod
+             * computations separate from the start.
+             */
+
+            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+               *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+               *out_lod_fpart = bld->float_bld.zero;
+               return;
+            }
+            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
+                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+               lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
+                                      out_lod_ipart, out_lod_fpart);
+               return;
+            }
          }
 
          if (0) {
@@ -437,21 +506,22 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
    }
 
    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-      if (BRILINEAR_FACTOR > 1.0) {
-         lp_build_brilinear_lod(bld, lod, BRILINEAR_FACTOR,
+      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+         lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
                                 out_lod_ipart, out_lod_fpart);
       }
       else {
          lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
       }
 
-      lp_build_name(*out_lod_ipart, "lod_ipart");
       lp_build_name(*out_lod_fpart, "lod_fpart");
    }
    else {
       *out_lod_ipart = lp_build_iround(float_bld, lod);
    }
 
+   lp_build_name(*out_lod_ipart, "lod_ipart");
+
    return;
 }
 
@@ -630,37 +700,21 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 void
 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
                             LLVMValueRef ilevel,
-                            LLVMValueRef *out_width_vec,
-                            LLVMValueRef *out_height_vec,
-                            LLVMValueRef *out_depth_vec,
+                            LLVMValueRef *out_size,
                             LLVMValueRef *row_stride_vec,
                             LLVMValueRef *img_stride_vec)
 {
    const unsigned dims = bld->dims;
    LLVMValueRef ilevel_vec;
-   LLVMValueRef size_vec;
-   LLVMTypeRef i32t = LLVMInt32Type();
 
    ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
 
    /*
     * Compute width, height, depth at mipmap level 'ilevel'
     */
-   size_vec = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+   *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
 
-   *out_width_vec = lp_build_extract_broadcast(bld->builder,
-                                               bld->int_size_type,
-                                               bld->int_coord_type,
-                                               size_vec,
-                                               LLVMConstInt(i32t, 0, 0));
    if (dims >= 2) {
-
-      *out_height_vec = lp_build_extract_broadcast(bld->builder,
-                                                   bld->int_size_type,
-                                                   bld->int_coord_type,
-                                                   size_vec,
-                                                   LLVMConstInt(i32t, 1, 0));
-
       *row_stride_vec = lp_build_get_level_stride_vec(bld,
                                                       bld->row_stride_array,
                                                       ilevel);
@@ -668,18 +722,90 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
          *img_stride_vec = lp_build_get_level_stride_vec(bld,
                                                          bld->img_stride_array,
                                                          ilevel);
-         if (dims == 3) {
-            *out_depth_vec = lp_build_extract_broadcast(bld->builder,
-                                                        bld->int_size_type,
-                                                        bld->int_coord_type,
-                                                        size_vec,
-                                                        LLVMConstInt(i32t, 2, 0));
-         }
       }
    }
 }
 
 
+/**
+ * Extract and broadcast texture size.
+ *
+ * @param size_type   type of the texture size vector (either
+ *                    bld->int_size_type or bld->float_size_type)
+ * @param coord_type  type of the texture size vector (either
+ *                    bld->int_coord_type or bld->coord_type)
+ * @param int_size    vector with the integer texture size (width, height,
+ *                    depth)
+ */
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+                             struct lp_type size_type,
+                             struct lp_type coord_type,
+                             LLVMValueRef size,
+                             LLVMValueRef *out_width,
+                             LLVMValueRef *out_height,
+                             LLVMValueRef *out_depth)
+{
+   const unsigned dims = bld->dims;
+   LLVMTypeRef i32t = LLVMInt32Type();
+
+   *out_width = lp_build_extract_broadcast(bld->builder,
+                                           size_type,
+                                           coord_type,
+                                           size,
+                                           LLVMConstInt(i32t, 0, 0));
+   if (dims >= 2) {
+      *out_height = lp_build_extract_broadcast(bld->builder,
+                                               size_type,
+                                               coord_type,
+                                               size,
+                                               LLVMConstInt(i32t, 1, 0));
+      if (dims == 3) {
+         *out_depth = lp_build_extract_broadcast(bld->builder,
+                                                 size_type,
+                                                 coord_type,
+                                                 size,
+                                                 LLVMConstInt(i32t, 2, 0));
+      }
+   }
+}
+
+
+/**
+ * Unnormalize coords.
+ *
+ * @param int_size  vector with the integer texture size (width, height, depth)
+ */
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+                             LLVMValueRef flt_size,
+                             LLVMValueRef *s,
+                             LLVMValueRef *t,
+                             LLVMValueRef *r)
+{
+   const unsigned dims = bld->dims;
+   LLVMValueRef width;
+   LLVMValueRef height;
+   LLVMValueRef depth;
+
+   lp_build_extract_image_sizes(bld,
+                                bld->float_size_type,
+                                bld->coord_type,
+                                flt_size,
+                                &width,
+                                &height,
+                                &depth);
+
+   /* s = s * width, t = t * height */
+   *s = lp_build_mul(&bld->coord_bld, *s, width);
+   if (dims >= 2) {
+      *t = lp_build_mul(&bld->coord_bld, *t, height);
+      if (dims >= 3) {
+         *r = lp_build_mul(&bld->coord_bld, *r, depth);
+      }
+   }
+}
+
 
 /** Helper used by lp_build_cube_lookup() */
 static LLVMValueRef
@@ -798,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
 
    {
-      struct lp_build_flow_context *flow_ctx;
       struct lp_build_if_state if_ctx;
+      LLVMValueRef face_s_var;
+      LLVMValueRef face_t_var;
+      LLVMValueRef face_var;
 
-      flow_ctx = lp_build_flow_create(bld->builder);
-      lp_build_flow_scope_begin(flow_ctx);
-
-      *face_s = bld->coord_bld.undef;
-      *face_t = bld->coord_bld.undef;
-      *face = bld->int_bld.undef;
-
-      lp_build_name(*face_s, "face_s");
-      lp_build_name(*face_t, "face_t");
-      lp_build_name(*face, "face");
+      face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var");
+      face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var");
+      face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var");
 
-      lp_build_flow_scope_declare(flow_ctx, face_s);
-      lp_build_flow_scope_declare(flow_ctx, face_t);
-      lp_build_flow_scope_declare(flow_ctx, face);
-
-      lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+      lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz);
       {
          /* +/- X face */
          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
@@ -826,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
          *face = lp_build_cube_face(bld, rx,
                                     PIPE_TEX_FACE_POS_X,
                                     PIPE_TEX_FACE_NEG_X);
+         LLVMBuildStore(bld->builder, *face_s, face_s_var);
+         LLVMBuildStore(bld->builder, *face_t, face_t_var);
+         LLVMBuildStore(bld->builder, *face, face_var);
       }
       lp_build_else(&if_ctx);
       {
-         struct lp_build_flow_context *flow_ctx2;
          struct lp_build_if_state if_ctx2;
 
-         LLVMValueRef face_s2 = bld->coord_bld.undef;
-         LLVMValueRef face_t2 = bld->coord_bld.undef;
-         LLVMValueRef face2 = bld->int_bld.undef;
-
-         flow_ctx2 = lp_build_flow_create(bld->builder);
-         lp_build_flow_scope_begin(flow_ctx2);
-         lp_build_flow_scope_declare(flow_ctx2, &face_s2);
-         lp_build_flow_scope_declare(flow_ctx2, &face_t2);
-         lp_build_flow_scope_declare(flow_ctx2, &face2);
-
          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
 
-         lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+         lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz);
          {
             /* +/- Y face */
             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
-            face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
-            face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
-            face2 = lp_build_cube_face(bld, ry,
+            *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+            *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+            *face = lp_build_cube_face(bld, ry,
                                        PIPE_TEX_FACE_POS_Y,
                                        PIPE_TEX_FACE_NEG_Y);
+            LLVMBuildStore(bld->builder, *face_s, face_s_var);
+            LLVMBuildStore(bld->builder, *face_t, face_t_var);
+            LLVMBuildStore(bld->builder, *face, face_var);
          }
          lp_build_else(&if_ctx2);
          {
             /* +/- Z face */
             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
-            face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
-            face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
-            face2 = lp_build_cube_face(bld, rz,
+            *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+            *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+            *face = lp_build_cube_face(bld, rz,
                                        PIPE_TEX_FACE_POS_Z,
                                        PIPE_TEX_FACE_NEG_Z);
+            LLVMBuildStore(bld->builder, *face_s, face_s_var);
+            LLVMBuildStore(bld->builder, *face_t, face_t_var);
+            LLVMBuildStore(bld->builder, *face, face_var);
          }
          lp_build_endif(&if_ctx2);
-         lp_build_flow_scope_end(flow_ctx2);
-         lp_build_flow_destroy(flow_ctx2);
-         *face_s = face_s2;
-         *face_t = face_t2;
-         *face = face2;
       }
 
       lp_build_endif(&if_ctx);
-      lp_build_flow_scope_end(flow_ctx);
-      lp_build_flow_destroy(flow_ctx);
+
+      *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s");
+      *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t");
+      *face   = LLVMBuildLoad(bld->builder, face_var, "face");
    }
 }