summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosé Fonseca <[email protected]>2010-10-06 18:44:51 +0100
committerJosé Fonseca <[email protected]>2010-10-06 18:51:25 +0100
commit87dd859b342b844add906358810445da21b6b092 (patch)
treef36dd3bf9f2213917bc9764b5b1aaf24b1108065
parent1c32583581ef5aee59901d9dd8e56cc1a125f0d4 (diff)
gallivm: Compute lod as integer whenever possible.
More accurate/faster results for PIPE_TEX_MIPFILTER_NEAREST. Less FP <-> SI conversion overall.
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c170
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c40
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c31
4 files changed, 158 insertions, 95 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index c1c98bf8596..3287cf7c376 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -168,6 +168,73 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Generate code to compute coordinate gradient (rho).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ *
+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef dsdx, dsdy;
+ LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
+ LLVMValueRef rho;
+
+ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
+
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
+ rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
+ }
+
+ return rho;
+}
+
+
+/**
* Generate code to compute texture level of detail (lambda).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
@@ -180,7 +247,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* XXX: The resulting lod is scalar, so ignore all but the first element of
* derivatives, lod_bias, etc that are passed by the shader.
*/
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
@@ -189,9 +256,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
- LLVMValueRef depth)
+ LLVMValueRef depth,
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod;
+
+ *out_lod_ipart = bld->int_bld.zero;
+ *out_lod_fpart = bld->float_bld.zero;
+
if (bld->static_state->min_max_lod_equal) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
@@ -199,68 +275,40 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef min_lod =
bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
- return min_lod;
+ lod = min_lod;
}
else {
- struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias =
bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef lod;
if (explicit_lod) {
lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
index0, "");
}
else {
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef dsdx, dsdy;
- LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
+ rho = lp_build_rho(bld, ddx, ddy, width, height, depth);
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
- }
+ /* compute lod = log2(rho) */
+ if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+ mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
+ !lod_bias &&
+ !bld->static_state->lod_bias_non_zero &&
+ !bld->static_state->apply_max_lod &&
+ !bld->static_state->apply_min_lod) {
+ *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+ *out_lod_fpart = bld->float_bld.zero;
+ return;
}
- /* compute lod = log2(rho) */
-#if 0
- lod = lp_build_log2(float_bld, rho);
-#else
- lod = lp_build_fast_log2(float_bld, rho);
-#endif
+ if (0) {
+ lod = lp_build_log2(float_bld, rho);
+ }
+ else {
+ lod = lp_build_fast_log2(float_bld, rho);
+ }
/* add shader lod bias */
if (lod_bias) {
@@ -288,9 +336,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
lod = lp_build_max(float_bld, lod, min_lod);
}
+ }
- return lod;
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ LLVMValueRef ipart = lp_build_ifloor(float_bld, lod);
+ lp_build_name(ipart, "lod_ipart");
+ *out_lod_ipart = ipart;
+ ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, "");
+ *out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart");
}
+ else {
+ *out_lod_ipart = lp_build_iround(float_bld, lod);
+ }
+
+ return;
}
@@ -304,10 +363,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
LLVMValueRef *level_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;
@@ -317,7 +375,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
bld->builder, unit);
/* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
+ level = lod_ipart;
/* clamp level to legal range of levels */
*level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -332,12 +390,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
+ LLVMValueRef *level1_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;
@@ -345,7 +401,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
bld->builder, unit);
/* convert float lod to integer */
- lp_build_ifloor_fract(float_bld, lod, &level, weight_out);
+ level = lod_ipart;
/* compute level 0 and clamp to legal range of levels */
*level0_out = lp_build_clamp(int_bld, level,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index bb83ede9314..b019c3fa5e7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -274,7 +274,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned unit,
const LLVMValueRef ddx[4],
@@ -283,7 +283,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
- LLVMValueRef depth);
+ LLVMValueRef depth,
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart);
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -294,10 +297,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out);
+ LLVMValueRef *level1_out);
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 49a6eed615f..8a55681166d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -882,13 +882,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
LLVMValueRef data_array,
LLVMValueRef texel_out[4])
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -936,7 +936,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
ddy = face_ddy;
}
-
/*
* Compute the level of detail (float).
*/
@@ -945,9 +944,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, unit, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
/*
@@ -966,30 +969,29 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- assert(lod);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
break;
case PIPE_TEX_MIPFILTER_LINEAR:
{
LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
- LLVMValueRef i255 = lp_build_const_int32(255);
+ LLVMTypeRef i32_type = LLVMIntType(32);
LLVMTypeRef i16_type = LLVMIntType(16);
- assert(lod);
+ assert(lod_fpart);
+
+ lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
- lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
- lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
+ lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
@@ -1049,9 +1051,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
lp_build_flow_scope_declare(flow_ctx, &packed_lo);
lp_build_flow_scope_declare(flow_ctx, &packed_hi);
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(builder, LLVMRealUGE,
- lod, float_bld->zero, "");
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
lp_build_if(&if_ctx, flow_ctx, builder, minify);
{
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index d464147371d..4f9bf6763e6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -884,12 +884,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef data_array,
LLVMValueRef *colors_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -935,9 +935,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, unit, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
/*
@@ -950,22 +954,21 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
}
}
else {
- assert(lod);
+ assert(lod_ipart);
if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
+ lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
}
}
@@ -1019,9 +1022,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- lod, float_bld->zero, "");
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(bld->builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
{