nir/lower_tex: add lowering for texture gradient on cube maps

This is ported from the Intel lowering pass that we use with GLSL IR. The NIR pass only handles cube maps, not shadow samplers, which are also lowered for gen < 8 on Intel hardware. We will add support for that in a later patch, at which point we should be able to remove the GLSL IR lowering pass. v2: - added a helper to retrieve ddx/ddy parameters (Ken) - No need to make size.z=1.0, we are only using component x anyway (Iago) v3: - Get rid of the ddx/ddy helper and use nir_tex_instr_src_index instead (Ken, Eric) v4: - When emitting the textureLod operation, copy all texture parameters from the original textureGrad() (except for ddx/ddy) using a loop - Add a 'continue' statement if the lowering makes progress because it replaces the original texture instruction Reviewed-by: Kenneth Graunke <[email protected]> (v3)
author: Iago Toral Quiroga <[email protected]> 2016-11-30 09:40:43 +0100
committer: Iago Toral Quiroga <[email protected]> 2016-12-13 10:32:00 +0100
commit: a8e740c3547b0209d04b505d95a79794db31ce0b (patch)
tree: 010a776eb5d4f28abc6ebd422bafa39d76e56a54 /src
parent: bac303c286de2a07912333a775bf4af18cd04e7a (diff)
2 files changed, 213 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3040cbd2c36..826410d5ac3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2394,6 +2394,11 @@ typedef struct nir_lower_tex_options {
     * of the texture are lowered to linear.
     */
    unsigned lower_srgb;
+
+   /**
+    * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
+    */
+   bool lower_txd_cube_map;
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 08cb6681ace..11773cbee57 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -304,6 +304,207 @@ lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
                       nir_channel(b, xuxv, 3));
 }
 
+/*
+ * Emits a textureLod operation used to replace an existing
+ * textureGrad instruction.
+ */
+static void
+replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
+{
+   /* We are going to emit a textureLod() with the same parameters except that
+    * we replace ddx/ddy with lod.
+    */
+   int num_srcs = tex->num_srcs - 1;
+   nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
+
+   txl->op = nir_texop_txl;
+   txl->sampler_dim = tex->sampler_dim;
+   txl->texture_index = tex->texture_index;
+   txl->dest_type = tex->dest_type;
+   txl->is_array = tex->is_array;
+   txl->is_shadow = tex->is_shadow;
+   txl->is_new_style_shadow = tex->is_new_style_shadow;
+   txl->sampler_index = tex->sampler_index;
+   txl->texture = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->texture->deref);
+   txl->sampler = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->sampler->deref);
+   txl->coord_components = tex->coord_components;
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+
+   int src_num = 0;
+   for (int i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type == nir_tex_src_ddx ||
+          tex->src[i].src_type == nir_tex_src_ddy)
+         continue;
+      nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl);
+      txl->src[src_num].src_type = tex->src[i].src_type;
+      src_num++;
+   }
+
+   txl->src[src_num].src = nir_src_for_ssa(lod);
+   txl->src[src_num].src_type = nir_tex_src_lod;
+   src_num++;
+
+   assert(src_num == num_srcs);
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+   nir_builder_instr_insert(b, &txl->instr);
+
+   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
+
+   nir_instr_remove(&tex->instr);
+}
+
+static void
+lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
+   assert(tex->op == nir_texop_txd);
+   assert(tex->dest.is_ssa);
+
+   /* Use textureSize() to get the width and height of LOD 0 */
+   nir_ssa_def *size = get_texture_size(b, tex);
+
+   /* Cubemap texture lookups first generate a texture coordinate normalized
+    * to [-1, 1] on the appropiate face. The appropiate face is determined
+    * by which component has largest magnitude and its sign. The texture
+    * coordinate is the quotient of the remaining texture coordinates against
+    * that absolute value of the component of largest magnitude. This
+    * division requires that the computing of the derivative of the texel
+    * coordinate must use the quotient rule. The high level GLSL code is as
+    * follows:
+    *
+    * Step 1: selection
+    *
+    * vec3 abs_p, Q, dQdx, dQdy;
+    * abs_p = abs(ir->coordinate);
+    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+    *    Q = ir->coordinate.yzx;
+    *    dQdx = ir->lod_info.grad.dPdx.yzx;
+    *    dQdy = ir->lod_info.grad.dPdy.yzx;
+    * }
+    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+    *    Q = ir->coordinate.xzy;
+    *    dQdx = ir->lod_info.grad.dPdx.xzy;
+    *    dQdy = ir->lod_info.grad.dPdy.xzy;
+    * }
+    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+    *    Q = ir->coordinate;
+    *    dQdx = ir->lod_info.grad.dPdx;
+    *    dQdy = ir->lod_info.grad.dPdy;
+    * }
+    *
+    * Step 2: use quotient rule to compute derivative. The normalized to
+    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+    * only concerned with the magnitudes of the derivatives whose values are
+    * not affected by the sign. We drop the sign from the computation.
+    *
+    * vec2 dx, dy;
+    * float recip;
+    *
+    * recip = 1.0 / Q.z;
+    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+    *
+    * Step 3: compute LOD. At this point we have the derivatives of the
+    * texture coordinates normalized to [-1,1]. We take the LOD to be
+    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+    * where L is the dimension of the cubemap. The code is:
+    *
+    * float M, result;
+    * M = max(dot(dx, dx), dot(dy, dy));
+    * L = textureSize(sampler, 0).x;
+    * result = -1.0 + 0.5 * log2(L * L * M);
+    */
+
+   /* coordinate */
+   nir_ssa_def *p =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
+
+   /* unmodified dPdx, dPdy values */
+   nir_ssa_def *dPdx =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
+   nir_ssa_def *dPdy =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
+
+   nir_ssa_def *abs_p = nir_fabs(b, p);
+   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
+   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
+   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
+
+   /* 1. compute selector */
+   nir_ssa_def *Q, *dQdx, *dQdy;
+
+   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
+   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
+
+   unsigned yzx[4] = { 1, 2, 0, 0 };
+   unsigned xzy[4] = { 0, 2, 1, 0 };
+
+   Q = nir_bcsel(b, cond_z,
+                 p,
+                 nir_bcsel(b, cond_y,
+                           nir_swizzle(b, p, xzy, 3, false),
+                           nir_swizzle(b, p, yzx, 3, false)));
+
+   dQdx = nir_bcsel(b, cond_z,
+                    dPdx,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdx, xzy, 3, false),
+                              nir_swizzle(b, dPdx, yzx, 3, false)));
+
+   dQdy = nir_bcsel(b, cond_z,
+                    dPdy,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdy, xzy, 3, false),
+                              nir_swizzle(b, dPdy, yzx, 3, false)));
+
+   /* 2. quotient rule */
+
+   /* tmp = Q.xy * recip;
+    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+    */
+   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
+
+   unsigned xy[4] = { 0, 1, 0, 0 };
+   nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false);
+   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
+
+   nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false);
+   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
+   nir_ssa_def *dx =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
+
+   nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false);
+   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
+   nir_ssa_def *dy =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
+
+   /* M = max(dot(dx, dx), dot(dy, dy)); */
+   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
+
+   /* size has textureSize() of LOD 0 */
+   nir_ssa_def *L = nir_channel(b, size, 0);
+
+   /* lod = -1.0 + 0.5 * log2(L * L * M); */
+   nir_ssa_def *lod =
+      nir_fadd(b,
+               nir_imm_float(b, -1.0f),
+               nir_fmul(b,
+                        nir_imm_float(b, 0.5f),
+                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
+
+   /* 3. Replace the gradient instruction with an equivalent lod instruction */
+   replace_gradient_with_lod(b, lod, tex);
+}
+
 static void
 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
 {
@@ -529,6 +730,13 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
          linearize_srgb_result(b, tex);
          progress = true;
       }
+
+      if (tex->op == nir_texop_txd && options->lower_txd_cube_map &&
+          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         lower_gradient_cube_map(b, tex);
+         progress = true;
+         continue;
+      }
    }
 
    return progress;
author	Iago Toral Quiroga <[email protected]>	2016-11-30 09:40:43 +0100
committer	Iago Toral Quiroga <[email protected]>	2016-12-13 10:32:00 +0100
commit	a8e740c3547b0209d04b505d95a79794db31ce0b (patch)
tree	010a776eb5d4f28abc6ebd422bafa39d76e56a54 /src
parent	bac303c286de2a07912333a775bf4af18cd04e7a (diff)