summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c13
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c288
2 files changed, 195 insertions, 106 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 98409c3be86..ee30a02d78c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1411,8 +1411,19 @@ lp_build_clamp(struct lp_build_context *bld,
assert(lp_check_value(bld->type, min));
assert(lp_check_value(bld->type, max));
- a = lp_build_min(bld, a, max);
+ /*
+ * XXX dark magic warning: The order of min/max here matters (!).
+ * The reason is a typical use case is clamp(a, 0.0, 1.0)
+ * (for example for float->unorm conversion) and on x86 sse2
+ * this will give 0.0 for NaNs, whereas doing min first will
+ * give 1.0 for NaN which makes d3d10 angry...
+ * This is very much not guaranteed behavior though which just
+ * happens to work x86 sse2 (and up), and obviously won't do anything
+ * for other non-zero clamps (say -1.0/1.0 in a SNORM conversion) neither,
+ * so need to fix this for real...
+ */
a = lp_build_max(bld, a, min);
+ a = lp_build_min(bld, a, max);
return a;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 15632bcf448..20a08cbb7a0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -682,6 +682,41 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
+ * Do shadow test/comparison.
+ * \param p shadow ref value
+ * \param texel the texel to compare against
+ */
+static LLVMValueRef
+lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
+ LLVMValueRef p,
+ LLVMValueRef texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ LLVMValueRef res;
+
+ if (0) {
+ //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
+ lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
+ }
+
+ /* result = (p FUNC texel) ? 1 : 0 */
+ /*
+ * honor d3d10 floating point rules here, which state that comparisons
+ * are ordered except NOT_EQUAL which is unordered.
+ */
+ if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
+ res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
+ p, texel);
+ }
+ else {
+ res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
+ p, texel);
+ }
+ return res;
+}
+
+
+/**
* Generate code to sample a mipmap level with nearest filtering.
* If sampling a cube texture, r = cube face in [0,5].
*/
@@ -760,8 +795,60 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
x, y, z,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, colors_out);
+
+ if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
+ LLVMValueRef cmpval;
+ cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
+ /* this is really just a AND 1.0, cmpval but llvm is clever enough */
+ colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
+ bld->texel_bld.one, bld->texel_bld.zero);
+ colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
+ }
+
+}
+
+
+/**
+ * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
+ */
+static LLVMValueRef
+lp_build_masklerp(struct lp_build_context *bld,
+ LLVMValueRef weight,
+ LLVMValueRef mask0,
+ LLVMValueRef mask1)
+{
+ struct gallivm_state *gallivm = bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef weight2;
+
+ weight2 = lp_build_sub(bld, bld->one, weight);
+ weight = LLVMBuildBitCast(builder, weight,
+ lp_build_int_vec_type(gallivm, bld->type), "");
+ weight2 = LLVMBuildBitCast(builder, weight2,
+ lp_build_int_vec_type(gallivm, bld->type), "");
+ weight = LLVMBuildAnd(builder, weight, mask1, "");
+ weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
+ weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
+ weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
+ return lp_build_add(bld, weight, weight2);
}
+/**
+ * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
+ */
+static LLVMValueRef
+lp_build_masklerp2d(struct lp_build_context *bld,
+ LLVMValueRef weight0,
+ LLVMValueRef weight1,
+ LLVMValueRef mask00,
+ LLVMValueRef mask01,
+ LLVMValueRef mask10,
+ LLVMValueRef mask11)
+{
+ LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
+ LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
+ return lp_build_lerp(bld, weight1, val0, val1, 0);
+}
/**
* Generate code to sample a mipmap level with linear filtering.
@@ -861,12 +948,23 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
data_ptr, mipoffsets, neighbors[0][1]);
if (dims == 1) {
- /* Interpolate two samples from 1D image to produce one color */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
- neighbors[0][0][chan],
- neighbors[0][1][chan],
- 0);
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ /* Interpolate two samples from 1D image to produce one color */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ 0);
+ }
+ }
+ else {
+ LLVMValueRef cmpval0, cmpval1;
+ cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ /* simplified lerp, AND mask with weight and add */
+ colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
+ cmpval0, cmpval1);
+ colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
}
}
else {
@@ -885,15 +983,27 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors[1][1]);
- /* Bilinear interpolate the four samples from the 2D image / 3D slice */
- for (chan = 0; chan < 4; chan++) {
- colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
- s_fpart, t_fpart,
- neighbors[0][0][chan],
- neighbors[0][1][chan],
- neighbors[1][0][chan],
- neighbors[1][1][chan],
- 0);
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ /* Bilinear interpolate the four samples from the 2D image / 3D slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan],
+ 0);
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+ colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
+ cmpval00, cmpval01, cmpval10, cmpval11);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
}
if (dims == 3) {
@@ -922,23 +1032,39 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
row_stride_vec, img_stride_vec,
data_ptr, mipoffsets, neighbors1[1][1]);
- /* Bilinear interpolate the four samples from the second Z slice */
- for (chan = 0; chan < 4; chan++) {
- colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
- s_fpart, t_fpart,
- neighbors1[0][0][chan],
- neighbors1[0][1][chan],
- neighbors1[1][0][chan],
- neighbors1[1][1][chan],
- 0);
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ /* Bilinear interpolate the four samples from the second Z slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors1[0][0][chan],
+ neighbors1[0][1][chan],
+ neighbors1[1][0][chan],
+ neighbors1[1][1][chan],
+ 0);
+ }
+ /* Linearly interpolate the two samples from the two 3D slices */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_lerp(&bld->texel_bld,
+ r_fpart,
+ colors0[chan], colors1[chan],
+ 0);
+ }
}
-
- /* Linearly interpolate the two samples from the two 3D slices */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = lp_build_lerp(&bld->texel_bld,
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+ colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
+ cmpval00, cmpval01, cmpval10, cmpval11);
+ /* Linearly interpolate the two samples from the two 3D slices */
+ colors_out[0] = lp_build_lerp(&bld->texel_bld,
r_fpart,
- colors0[chan], colors1[chan],
+ colors0[0], colors1[0],
0);
+ colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
}
}
else {
@@ -1173,6 +1299,31 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
}
+ if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
+ /*
+ * Clamp p coords to [0,1] for fixed function depth texture format here.
+ * Technically this is not entirely correct for unorm depth as the ref value
+ * should be converted to the depth format (quantization!) and comparison
+ * then done in texture format. This would actually help performance (since
+ * only need to do it once and could save the per-sample conversion of texels
+ * to floats instead), but it would need more messy code (would need to push
+ * at least some bits down to actual fetch so conversion could be skipped,
+ * and would have ugly interaction with border color, would need to convert
+ * border color to that format too or do some other tricks to make it work).
+ */
+ const struct util_format_description *format_desc;
+ unsigned chan_type;
+ format_desc = util_format_description(bld->static_texture_state->format);
+ /* not entirely sure we couldn't end up with non-valid swizzle here */
+ chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+ format_desc->channel[format_desc->swizzle[0]].type :
+ UTIL_FORMAT_TYPE_FLOAT;
+ if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
+ coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
+ bld->coord_bld.zero, bld->coord_bld.one);
+ }
+ }
+
/*
* Compute the level of detail (float).
*/
@@ -1455,79 +1606,6 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
/**
- * Do shadow test/comparison.
- * \param coords incoming texcoords
- * \param texel the texel to compare against (use the X channel)
- * Ideally this should really be done per-sample.
- */
-static void
-lp_build_sample_compare(struct lp_build_sample_context *bld,
- LLVMValueRef p,
- LLVMValueRef texel[4])
-{
- struct lp_build_context *texel_bld = &bld->texel_bld;
- LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef res;
- const unsigned chan = 0;
- unsigned chan_type;
- const struct util_format_description *format_desc;
-
- if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE)
- return;
-
- /* debug code */
- if (0) {
- LLVMValueRef indx = lp_build_const_int32(bld->gallivm, 0);
- LLVMValueRef coord = LLVMBuildExtractElement(builder, p, indx, "");
- LLVMValueRef tex = LLVMBuildExtractElement(builder, texel[chan], indx, "");
- lp_build_printf(bld->gallivm, "shadow compare coord %f to texture %f\n",
- coord, tex);
- }
-
- /* Clamp p coords to [0,1] for fixed function depth texture format */
- format_desc = util_format_description(bld->static_texture_state->format);
- /* not entirely sure we couldn't end up with non-valid swizzle here */
- chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
- format_desc->channel[format_desc->swizzle[0]].type :
- UTIL_FORMAT_TYPE_FLOAT;
- if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
- p = lp_build_clamp(&bld->coord_bld, p,
- bld->coord_bld.zero, bld->coord_bld.one);
- }
-
- /*
- * technically this is not entirely correct for unorm depth as the ref value
- * should be converted to the depth format (quantization!) and comparison
- * then done in texture format.
- */
-
- /* result = (p FUNC texel) ? 1 : 0 */
- /*
- * honor d3d10 floating point rules here, which state that comparisons
- * are ordered except NOT_EQUAL which is unordered.
- */
- if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
- res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
- p, texel[chan]);
- }
- else {
- res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
- p, texel[chan]);
- }
- res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero);
-
- /*
- * returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE.
- * This should be ok because sampler swizzle is applied on top of it.
- */
- texel[0] =
- texel[1] =
- texel[2] = res;
- texel[3] = texel_bld->one;
-}
-
-
-/**
* Just set texels to white instead of actually sampling the texture.
* For debugging.
*/
@@ -1749,7 +1827,9 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
lp_is_simple_wrap_mode(static_sampler_state->wrap_s) &&
- lp_is_simple_wrap_mode(static_sampler_state->wrap_t);
+ lp_is_simple_wrap_mode(static_sampler_state->wrap_t) &&
+ /* not sure this is strictly needed or simply impossible */
+ static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE;
if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
!use_aos && util_format_fits_8unorm(bld.format_desc)) {
@@ -1939,8 +2019,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
}
}
-
- lp_build_sample_compare(&bld, newcoords[4], texel_out);
}
if (target != PIPE_BUFFER) {