summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c62
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c2
3 files changed, 54 insertions, 13 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index a032d9d6895..e60a035a882 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -36,6 +36,7 @@
#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_arit.h"
#include "lp_bld_const.h"
#include "lp_bld_debug.h"
@@ -248,7 +249,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
first_level = bld->dynamic_state->first_level(bld->dynamic_state,
bld->gallivm, texture_unit);
first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
- int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
+ int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
float_size = lp_build_int_to_float(float_size_bld, int_size);
if (cube_rho) {
@@ -1089,12 +1090,14 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
/**
* Codegen equivalent for u_minify().
+ * @param lod_scalar if lod is a (broadcasted) scalar
* Return max(1, base_size >> level);
*/
LLVMValueRef
lp_build_minify(struct lp_build_context *bld,
LLVMValueRef base_size,
- LLVMValueRef level)
+ LLVMValueRef level,
+ boolean lod_scalar)
{
LLVMBuilderRef builder = bld->gallivm->builder;
assert(lp_check_value(bld->type, base_size));
@@ -1105,10 +1108,49 @@ lp_build_minify(struct lp_build_context *bld,
return base_size;
}
else {
- LLVMValueRef size =
- LLVMBuildLShr(builder, base_size, level, "minify");
+ LLVMValueRef size;
assert(bld->type.sign);
- size = lp_build_max(bld, size, bld->one);
+ if (lod_scalar ||
+ (util_cpu_caps.has_avx2 || !util_cpu_caps.has_sse)) {
+ size = LLVMBuildLShr(builder, base_size, level, "minify");
+ size = lp_build_max(bld, size, bld->one);
+ }
+ else {
+ /*
+ * emulate shift with float mul, since intel "forgot" shifts with
+ * per-element shift count until avx2, which results in terrible
+ * scalar extraction (both count and value), scalar shift,
+ * vector reinsertion. Should not be an issue on any non-x86 cpu
+ * with a vector instruction set.
+ * On cpus with AMD's XOP this should also be unnecessary but I'm
+ * not sure if llvm would emit this with current flags.
+ */
+ LLVMValueRef const127, const23, lf;
+ struct lp_type ftype;
+ struct lp_build_context fbld;
+ ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
+ lp_build_context_init(&fbld, bld->gallivm, ftype);
+ const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
+ const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
+
+ /* calculate 2^(-level) float */
+ lf = lp_build_sub(bld, const127, level);
+ lf = lp_build_shl(bld, lf, const23);
+ lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
+
+ /* finish shift operation by doing float mul */
+ base_size = lp_build_int_to_float(&fbld, base_size);
+ size = lp_build_mul(&fbld, base_size, lf);
+ /*
+ * do the max also with floats because
+ * a) non-emulated int max requires sse41
+ * (this is actually a lie as we could cast to 16bit values
+ * as 16bit is sufficient and 16bit int max is sse2)
+ * b) with avx we can do int max 4-wide but float max 8-wide
+ */
+ size = lp_build_max(&fbld, size, fbld.one);
+ size = lp_build_itrunc(&fbld, size);
+ }
return size;
}
}
@@ -1185,7 +1227,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
*/
if (bld->num_mips == 1) {
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
- *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
}
else {
LLVMValueRef int_size_vec;
@@ -1229,7 +1271,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
bld4.type,
ilevel,
indexi);
- tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli);
+ tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
}
/*
* out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
@@ -1248,7 +1290,6 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
* with 4-wide vector pack all elements into a 8xi16 vector
* (on which we can still do useful math) instead of using a 16xi32
* vector.
- * FIXME: some callers can't handle this yet.
* For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
* For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
*/
@@ -1257,8 +1298,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
assert(bld->int_size_in_bld.type.length == 1);
int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
bld->int_size);
- /* vector shift with variable shift count alert... */
- *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel);
+ *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
}
else {
LLVMValueRef ilevel1;
@@ -1267,7 +1307,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
bld->int_size_in_bld.type, ilevel, indexi);
tmp[i] = bld->int_size;
- tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
+ tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
}
*out_size = lp_build_concat(bld->gallivm, tmp,
bld->int_size_in_bld.type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 5039128a203..fd4e0532607 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -547,7 +547,8 @@ lp_build_sample_nop(struct gallivm_state *gallivm,
LLVMValueRef
lp_build_minify(struct lp_build_context *bld,
LLVMValueRef base_size,
- LLVMValueRef level);
+ LLVMValueRef level,
+ boolean lod_scalar);
#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 2d833318aee..e8c04d1e6c5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2940,7 +2940,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
lp_build_const_int32(gallivm, 2), "");
}
- size = lp_build_minify(&bld_int_vec4, size, lod);
+ size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
if (has_array)
size = LLVMBuildInsertElement(gallivm->builder, size,