summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c253
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c35
3 files changed, 235 insertions, 56 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index dc593aabac4..39c3a2f9d9e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -273,7 +273,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
rho = lp_build_mul(rho_bld, cubesize, rho);
}
- else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
+ else if (derivs) {
LLVMValueRef ddmax[3], ddx[3], ddy[3];
for (i = 0; i < dims; i++) {
LLVMValueRef floatdim;
@@ -1481,6 +1481,21 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
}
+/** Helper for doing 3-wise selection.
+ * Returns sel1 ? val2 : (sel0 ? val0 : val1).
+ */
+static LLVMValueRef
+lp_build_select3(struct lp_build_context *sel_bld,
+ LLVMValueRef sel0,
+ LLVMValueRef sel1,
+ LLVMValueRef val0,
+ LLVMValueRef val1,
+ LLVMValueRef val2)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_select(sel_bld, sel0, val0, val1);
+ return lp_build_select(sel_bld, sel1, val2, tmp);
+}
/**
* Generate code to do cube face selection and compute per-face texcoords.
@@ -1488,8 +1503,9 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
LLVMValueRef *coords,
- const struct lp_derivatives *derivs, /* optional */
+ const struct lp_derivatives *derivs_in, /* optional */
LLVMValueRef *rho,
+ struct lp_derivatives *derivs_out, /* optional */
boolean need_derivs)
{
struct lp_build_context *coord_bld = &bld->coord_bld;
@@ -1512,19 +1528,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
* the edge). Still this is possibly a win over just selecting the same face
* for all pixels. Unfortunately, something like that doesn't work for
* explicit derivatives.
- * TODO: handle explicit derivatives by transforming them alongside coords
- * somehow.
*/
struct lp_build_context *cint_bld = &bld->int_coord_bld;
struct lp_type intctype = cint_bld->type;
LLVMTypeRef coord_vec_type = coord_bld->vec_type;
LLVMTypeRef cint_vec_type = cint_bld->vec_type;
- LLVMValueRef signs, signt, signr, signma;
LLVMValueRef as, at, ar, face, face_s, face_t;
LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
LLVMValueRef tnegi, rnegi;
- LLVMValueRef ma, mai, ima;
+ LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1 << (intctype.width - 1));
@@ -1563,7 +1576,166 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
maxasat = lp_build_max(coord_bld, as, at);
ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
- if (need_derivs) {
+ if (need_derivs && (derivs_in ||
+ ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+ (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
+ /*
+ * XXX: This is really really complex.
+ * It is a bit overkill to use this for implicit derivatives as well,
+ * no way this is worth the cost in practice, but seems to be the
+ * only way for getting accurate and per-pixel lod values.
+ */
+ LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
+ LLVMValueRef madx, mady, madxdivma, madydivma;
+ LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
+ LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
+ LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
+ LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
+ LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
+ /*
+ * s = 1/2 * ( sc / ma + 1)
+ * t = 1/2 * ( tc / ma + 1)
+ *
+ * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
+ * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
+ *
+ * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
+ * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
+ * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
+ * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
+ */
+
+ /* select ma, calculate ima */
+ ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
+ mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
+ signmabit = LLVMBuildAnd(builder, mai, signmask, "");
+ ima = lp_build_div(coord_bld, coord_bld->one, ma);
+ imahalf = lp_build_mul(coord_bld, posHalf, ima);
+ imahalfpos = lp_build_abs(coord_bld, imahalf);
+
+ if (!derivs_in) {
+ ddx[0] = lp_build_ddx(coord_bld, s);
+ ddx[1] = lp_build_ddx(coord_bld, t);
+ ddx[2] = lp_build_ddx(coord_bld, r);
+ ddy[0] = lp_build_ddy(coord_bld, s);
+ ddy[1] = lp_build_ddy(coord_bld, t);
+ ddy[2] = lp_build_ddy(coord_bld, r);
+ }
+ else {
+ ddx[0] = derivs_in->ddx[0];
+ ddx[1] = derivs_in->ddx[1];
+ ddx[2] = derivs_in->ddx[2];
+ ddy[0] = derivs_in->ddy[0];
+ ddy[1] = derivs_in->ddy[1];
+ ddy[2] = derivs_in->ddy[2];
+ }
+
+ /* select major derivatives */
+ madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
+ mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
+
+ si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
+ ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
+ ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
+
+ sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
+ tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
+ rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
+
+ sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
+ tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
+ rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
+
+ /*
+ * compute all possible new s/t coords, which does the mirroring,
+ * and do the same for derivs minor axes.
+ * snewx = signma * -r;
+ * tnewx = -t;
+ * snewy = s;
+ * tnewy = signma * r;
+ * snewz = signma * s;
+ * tnewz = -t;
+ */
+ tnegi = LLVMBuildXor(builder, ti, signmask, "");
+ rnegi = LLVMBuildXor(builder, ri, signmask, "");
+ tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
+ rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
+ tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
+ rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
+
+ snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
+ tnewx = tnegi;
+ sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
+ tdxnewx = tdxnegi;
+ sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
+ tdynewx = tdynegi;
+
+ snewy = si;
+ tnewy = LLVMBuildXor(builder, signmabit, ri, "");
+ sdxnewy = sdxi;
+ tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
+ sdynewy = sdyi;
+ tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
+
+ snewz = LLVMBuildXor(builder, signmabit, si, "");
+ tnewz = tnegi;
+ sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
+ tdxnewz = tdxnegi;
+ sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
+ tdynewz = tdynegi;
+
+ /* select the mirrored values */
+ face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
+ face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
+ face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
+ face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
+ face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
+ face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
+ face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
+
+ face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
+ face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
+ face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
+ face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
+ face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
+ face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
+
+ /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
+ madxdivma = lp_build_mul(coord_bld, madx, ima);
+ tmp = lp_build_mul(coord_bld, madxdivma, face_s);
+ tmp = lp_build_sub(coord_bld, face_sdx, tmp);
+ derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
+
+ /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
+ tmp = lp_build_mul(coord_bld, madxdivma, face_t);
+ tmp = lp_build_sub(coord_bld, face_tdx, tmp);
+ derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
+
+ /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
+ madydivma = lp_build_mul(coord_bld, mady, ima);
+ tmp = lp_build_mul(coord_bld, madydivma, face_s);
+ tmp = lp_build_sub(coord_bld, face_sdy, tmp);
+ derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
+
+ /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
+ tmp = lp_build_mul(coord_bld, madydivma, face_t);
+ tmp = lp_build_sub(coord_bld, face_tdy, tmp);
+ derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
+
+ signma = LLVMBuildLShr(builder, mai, signshift, "");
+ coords[2] = LLVMBuildOr(builder, face, signma, "face");
+
+ /* project coords */
+ face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
+ face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
+
+ coords[0] = lp_build_add(coord_bld, face_s, posHalf);
+ coords[1] = lp_build_add(coord_bld, face_t, posHalf);
+
+ return;
+ }
+
+ else if (need_derivs) {
LLVMValueRef ddx_ddy[2], tmp[3], rho_vec;
static const unsigned char swizzle0[] = { /* no-op swizzle */
0, LP_BLD_SWIZZLE_DONTCARE,
@@ -1590,12 +1762,11 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
* scale the s/t/r coords pre-select/mirror so we can calculate
* "reasonable" derivs.
*/
- ma = lp_build_select(coord_bld, as_ge_at, s, t);
- ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
- ima = lp_build_cube_imapos(coord_bld, ma);
- s = lp_build_mul(coord_bld, s, ima);
- t = lp_build_mul(coord_bld, t, ima);
- r = lp_build_mul(coord_bld, r, ima);
+ ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
+ imahalfpos = lp_build_cube_imapos(coord_bld, ma);
+ s = lp_build_mul(coord_bld, s, imahalfpos);
+ t = lp_build_mul(coord_bld, t, imahalfpos);
+ r = lp_build_mul(coord_bld, r, imahalfpos);
/*
* This isn't quite the same as the "ordinary" (3d deriv) path since we
@@ -1625,56 +1796,41 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
*rho = lp_build_max(coord_bld, tmp[0], tmp[1]);
}
+ if (!need_derivs) {
+ ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
+ }
+ mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
+ signmabit = LLVMBuildAnd(builder, mai, signmask, "");
+
si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
- signs = LLVMBuildAnd(builder, si, signmask, "");
- signt = LLVMBuildAnd(builder, ti, signmask, "");
- signr = LLVMBuildAnd(builder, ri, signmask, "");
/*
- * compute all possible new s/t coords
- * snewx = signs * -r;
+ * compute all possible new s/t coords, which does the mirroring
+ * snewx = signma * -r;
* tnewx = -t;
* snewy = s;
- * tnewy = signt * r;
- * snewz = signr * s;
+ * tnewy = signma * r;
+ * snewz = signma * s;
* tnewz = -t;
*/
tnegi = LLVMBuildXor(builder, ti, signmask, "");
rnegi = LLVMBuildXor(builder, ri, signmask, "");
- snewx = LLVMBuildXor(builder, signs, rnegi, "");
+ snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
tnewx = tnegi;
snewy = si;
- tnewy = LLVMBuildXor(builder, signt, ri, "");
+ tnewy = LLVMBuildXor(builder, signmabit, ri, "");
- snewz = LLVMBuildXor(builder, signr, si, "");
+ snewz = LLVMBuildXor(builder, signmabit, si, "");
tnewz = tnegi;
- /* XXX on x86 unclear if we should cast the values back to float
- * or not - on some cpus (nehalem) pblendvb has twice the throughput
- * of blendvps though on others there just might be domain
- * transition penalties when using it (this depends on what llvm
- * will chose for the bit ops above so there appears no "right way",
- * but given the boatload of selects let's just use the int type).
- */
-
- /* select/mirror */
- if (!need_derivs) {
- ma = lp_build_select(coord_bld, as_ge_at, s, t);
- }
- face_s = lp_build_select(cint_bld, as_ge_at, snewx, snewy);
- face_t = lp_build_select(cint_bld, as_ge_at, tnewx, tnewy);
- face = lp_build_select(cint_bld, as_ge_at, facex, facey);
-
- if (!need_derivs) {
- ma = lp_build_select(coord_bld, ar_ge_as_at, r, ma);
- }
- face_s = lp_build_select(cint_bld, ar_ge_as_at, snewz, face_s);
- face_t = lp_build_select(cint_bld, ar_ge_as_at, tnewz, face_t);
- face = lp_build_select(cint_bld, ar_ge_as_at, facez, face);
+ /* select the mirrored values */
+ face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
+ face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
+ face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
@@ -1684,15 +1840,14 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
* as long as we ensure vblendvps gets used we can actually
* skip the comparison and just use sign as a "mask" directly.
*/
- mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
signma = LLVMBuildLShr(builder, mai, signshift, "");
coords[2] = LLVMBuildOr(builder, face, signma, "face");
/* project coords */
if (!need_derivs) {
- ima = lp_build_cube_imapos(coord_bld, ma);
- face_s = lp_build_mul(coord_bld, face_s, ima);
- face_t = lp_build_mul(coord_bld, face_t, ima);
+ imahalfpos = lp_build_cube_imapos(coord_bld, ma);
+ face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
+ face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
}
coords[0] = lp_build_add(coord_bld, face_s, posHalf);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 803a99e3b0c..70f03503f0f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -457,8 +457,9 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
LLVMValueRef *coords,
- const struct lp_derivatives *derivs, /* optional */
+ const struct lp_derivatives *derivs_in, /* optional */
LLVMValueRef *rho,
+ struct lp_derivatives *derivs_out, /* optional */
boolean need_derivs);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 33378bcdcd0..54dee25bfd9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1387,6 +1387,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
const unsigned target = bld->static_texture_state->target;
LLVMValueRef first_level, cube_rho = NULL;
LLVMValueRef lod_ipart = NULL;
+ struct lp_derivatives cube_derivs;
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -1403,7 +1404,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
!bld->static_sampler_state->min_max_lod_equal &&
!explicit_lod);
- lp_build_cube_lookup(bld, coords, derivs, &cube_rho, need_derivs);
+ lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
+ derivs = &cube_derivs;
}
else if (target == PIPE_TEXTURE_1D_ARRAY ||
target == PIPE_TEXTURE_2D_ARRAY) {
@@ -2163,9 +2165,24 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
* avoided like min and max lod being equal.
*/
bld.num_mips = bld.num_lods = 1;
- if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
- (explicit_lod || lod_bias ||
- (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+ (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+ (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
+ (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ /*
+ * special case for using per-pixel lod even for implicit lod,
+ * which is generally never required (ok by APIs) except to please
+ * some (somewhat broken imho) tests (because per-pixel face selection
+ * can cause derivatives to be different for pixels outside the primitive
+ * due to the major axis division even if pre-project derivatives are
+ * looking normal).
+ */
+ bld.num_mips = type.length;
+ bld.num_lods = type.length;
+ }
+ else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
+ (explicit_lod || lod_bias || derivs)) {
if ((is_fetch && target != PIPE_BUFFER) ||
(!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
bld.num_mips = type.length;
@@ -2371,9 +2388,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld4.texel_type.length = 4;
bld4.num_mips = bld4.num_lods = 1;
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
+ (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+ (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
+ (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ bld4.num_mips = type4.length;
+ bld4.num_lods = type4.length;
+ }
if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
- (explicit_lod || lod_bias ||
- (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+ (explicit_lod || lod_bias || derivs)) {
if ((is_fetch && target != PIPE_BUFFER) ||
(!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
bld4.num_mips = type4.length;