summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c126
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h13
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c20
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c141
4 files changed, 169 insertions, 131 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 89d72494be0..e1cfd78e885 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *coord_bld = &bld->coord_bld;
- struct lp_build_context *levelf_bld = &bld->levelf_bld;
+ struct lp_build_context *rho_bld = &bld->lodf_bld;
const unsigned dims = bld->dims;
LLVMValueRef ddx_ddy[2];
LLVMBuilderRef builder = bld->gallivm->builder;
@@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
LLVMValueRef first_level, first_level_vec;
unsigned length = coord_bld->type.length;
unsigned num_quads = length / 4;
- boolean rho_per_quad = levelf_bld->type.length != length;
+ boolean rho_per_quad = rho_bld->type.length != length;
unsigned i;
LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
LLVMValueRef rho_xvec, rho_yvec;
@@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld,
*/
if (rho_per_quad) {
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- levelf_bld->type, cube_rho, 0);
+ rho_bld->type, cube_rho, 0);
}
else {
rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
}
if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
- rho = lp_build_sqrt(levelf_bld, rho);
+ rho = lp_build_sqrt(rho_bld, rho);
}
/* Could optimize this for single quad just skip the broadcast */
cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
- levelf_bld->type, float_size, index0);
- rho = lp_build_mul(levelf_bld, cubesize, rho);
+ rho_bld->type, float_size, index0);
+ rho = lp_build_mul(rho_bld, cubesize, rho);
}
else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
* otherwise would also need different code to per-pixel lod case.
*/
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- levelf_bld->type, rho, 0);
+ rho_bld->type, rho, 0);
}
- rho = lp_build_sqrt(levelf_bld, rho);
+ rho = lp_build_sqrt(rho_bld, rho);
}
else {
@@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
* rho_vec contains per-pixel rho, convert to scalar per quad.
*/
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- levelf_bld->type, rho, 0);
+ rho_bld->type, rho, 0);
}
}
}
@@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
if (rho_per_quad) {
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- levelf_bld->type, rho, 0);
+ rho_bld->type, rho, 0);
}
else {
/*
@@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
*/
rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
}
- rho = lp_build_sqrt(levelf_bld, rho);
+ rho = lp_build_sqrt(rho_bld, rho);
}
else {
ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
}
if (rho_per_quad) {
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- levelf_bld->type, rho, 0);
+ rho_bld->type, rho, 0);
}
else {
rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
@@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
}
}
if (!rho_per_quad) {
- rho = lp_build_broadcast_scalar(levelf_bld, rho);
+ rho = lp_build_broadcast_scalar(rho_bld, rho);
}
}
}
@@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
* \param out_lod_fpart float part of lod (never larger than 1 but may be negative)
* \param out_lod_positive (mask) if lod is positive (i.e. texture is minified)
*
- * The resulting lod is scalar per quad, so only the first value per quad
- * passed in from lod_bias, explicit_lod is used.
+ * The resulting lod can be scalar per quad or be per element.
*/
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
@@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
{
LLVMBuilderRef builder = bld->gallivm->builder;
- struct lp_build_context *levelf_bld = &bld->levelf_bld;
+ struct lp_build_context *lodf_bld = &bld->lodf_bld;
LLVMValueRef lod;
- *out_lod_ipart = bld->leveli_bld.zero;
- *out_lod_positive = bld->leveli_bld.zero;
- *out_lod_fpart = levelf_bld->zero;
+ *out_lod_ipart = bld->lodi_bld.zero;
+ *out_lod_positive = bld->lodi_bld.zero;
+ *out_lod_fpart = lodf_bld->zero;
/*
* For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
@@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
bld->dynamic_state->min_lod(bld->dynamic_state,
bld->gallivm, sampler_unit);
- lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+ lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
}
else {
if (explicit_lod) {
if (bld->num_lods != bld->coord_type.length)
lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- levelf_bld->type, explicit_lod, 0);
+ lodf_bld->type, explicit_lod, 0);
else
lod = explicit_lod;
}
@@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
* Don't actually need both all the time, ipart is needed
* for nearest mipfilter, pos_or_zero if min != mag.
*/
- *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
- *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
- rho, levelf_bld->one);
+ *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
+ *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+ rho, lodf_bld->one);
return;
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
- lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
+ lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
out_lod_ipart, out_lod_fpart);
- *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
- rho, levelf_bld->one);
+ *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+ rho, lodf_bld->one);
return;
}
}
if (0) {
- lod = lp_build_log2(levelf_bld, rho);
+ lod = lp_build_log2(lodf_bld, rho);
}
else {
- lod = lp_build_fast_log2(levelf_bld, rho);
+ lod = lp_build_fast_log2(lodf_bld, rho);
}
/* add shader lod bias */
if (lod_bias) {
if (bld->num_lods != bld->coord_type.length)
lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- levelf_bld->type, lod_bias, 0);
+ lodf_bld->type, lod_bias, 0);
lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
}
}
@@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef sampler_lod_bias =
bld->dynamic_state->lod_bias(bld->dynamic_state,
bld->gallivm, sampler_unit);
- sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
+ sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
sampler_lod_bias);
lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
}
@@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef max_lod =
bld->dynamic_state->max_lod(bld->dynamic_state,
bld->gallivm, sampler_unit);
- max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
+ max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
- lod = lp_build_min(levelf_bld, lod, max_lod);
+ lod = lp_build_min(lodf_bld, lod, max_lod);
}
if (bld->static_sampler_state->apply_min_lod) {
LLVMValueRef min_lod =
bld->dynamic_state->min_lod(bld->dynamic_state,
bld->gallivm, sampler_unit);
- min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+ min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
- lod = lp_build_max(levelf_bld, lod, min_lod);
+ lod = lp_build_max(lodf_bld, lod, min_lod);
}
}
- *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER,
- lod, levelf_bld->zero);
+ *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
+ lod, lodf_bld->zero);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
- lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
+ lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
out_lod_ipart, out_lod_fpart);
}
else {
- lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
+ lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
}
lp_build_name(*out_lod_fpart, "lod_fpart");
}
else {
- *out_lod_ipart = lp_build_iround(levelf_bld, lod);
+ *out_lod_ipart = lp_build_iround(lodf_bld, lod);
}
lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
out = lp_build_or(leveli_bld, out, out1);
- if (bld->num_lods == bld->coord_bld.type.length) {
+ if (bld->num_mips == bld->coord_bld.type.length) {
*out_of_bounds = out;
}
- else if (bld->num_lods == 1) {
+ else if (bld->num_mips == 1) {
*out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
}
else {
- assert(bld->num_lods == bld->coord_bld.type.length / 4);
+ assert(bld->num_mips == bld->coord_bld.type.length / 4);
*out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
leveli_bld->type,
bld->int_coord_bld.type,
@@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad)
- * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
+ * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
+ * part accordingly.
* Later, we'll sample from those two mipmap levels and interpolate between them.
*/
void
@@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
LLVMValueRef clamp_min;
LLVMValueRef clamp_max;
+ assert(bld->num_lods == bld->num_mips);
+
first_level = bld->dynamic_state->first_level(bld->dynamic_state,
bld->gallivm, texture_unit);
last_level = bld->dynamic_state->last_level(bld->dynamic_state,
@@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
LLVMValueRef indexes[2], offsets, offset1;
indexes[0] = lp_build_const_int32(bld->gallivm, 0);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
indexes[1] = level;
offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
}
- else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ else if (bld->num_mips == bld->coord_bld.type.length / 4) {
unsigned i;
offsets = bld->int_coord_bld.undef;
- for (i = 0; i < bld->num_lods; i++) {
+ for (i = 0; i < bld->num_mips; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
else {
unsigned i;
- assert (bld->num_lods == bld->coord_bld.type.length);
+ assert (bld->num_mips == bld->coord_bld.type.length);
offsets = bld->int_coord_bld.undef;
- for (i = 0; i < bld->num_lods; i++) {
+ for (i = 0; i < bld->num_mips; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
@@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
LLVMBuilderRef builder = bld->gallivm->builder;
LLVMValueRef indexes[2], stride, stride1;
indexes[0] = lp_build_const_int32(bld->gallivm, 0);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
indexes[1] = level;
stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
}
- else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ else if (bld->num_mips == bld->coord_bld.type.length / 4) {
LLVMValueRef stride1;
unsigned i;
stride = bld->int_coord_bld.undef;
- for (i = 0; i < bld->num_lods; i++) {
+ for (i = 0; i < bld->num_mips; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
@@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
LLVMValueRef stride1;
unsigned i;
- assert (bld->num_lods == bld->coord_bld.type.length);
+ assert (bld->num_mips == bld->coord_bld.type.length);
stride = bld->int_coord_bld.undef;
for (i = 0; i < bld->coord_bld.type.length; i++) {
@@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
/*
* Compute width, height, depth at mipmap level 'ilevel'
*/
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
*out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
}
@@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
unsigned num_quads = bld->coord_bld.type.length / 4;
unsigned i;
- if (bld->num_lods == num_quads) {
+ if (bld->num_mips == num_quads) {
/*
* XXX: this should be #ifndef SANE_INSTRUCTION_SET.
* intel "forgot" the variable shift count instruction until avx2.
@@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
* For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
* For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
*/
- assert(bld->num_lods == bld->coord_bld.type.length);
+ assert(bld->num_mips == bld->coord_bld.type.length);
if (bld->dims == 1) {
assert(bld->int_size_in_bld.type.length == 1);
int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
@@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
}
else {
LLVMValueRef ilevel1;
- for (i = 0; i < bld->num_lods; i++) {
+ for (i = 0; i < bld->num_mips; i++) {
LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
bld->int_size_in_bld.type, ilevel, indexi);
@@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
}
*out_size = lp_build_concat(bld->gallivm, tmp,
bld->int_size_in_bld.type,
- bld->num_lods);
+ bld->num_mips);
}
}
}
@@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
struct lp_type size_type = size_bld->type;
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
*out_width = lp_build_extract_broadcast(bld->gallivm,
size_type,
coord_type,
@@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
if (dims == 1) {
*out_width = size;
}
- else if (bld->num_lods == num_quads) {
+ else if (bld->num_mips == num_quads) {
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
if (dims >= 2) {
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
@@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
}
}
else {
- assert(bld->num_lods == bld->coord_type.length);
+ assert(bld->num_mips == bld->coord_type.length);
*out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
coord_type, size, 0);
if (dims >= 2) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index a7ebe7e9ed8..e6b9f30d7bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -233,7 +233,10 @@ struct lp_build_sample_context
/** SIMD vector width */
unsigned vector_width;
- /** number of lod values (valid are 1, length/4, length) */
+ /** number of mipmaps (valid are 1, length/4, length) */
+ unsigned num_mips;
+
+ /** number of lod values (valid are 1, length/4, length) */
unsigned num_lods;
/** regular scalar float type */
@@ -283,6 +286,14 @@ struct lp_build_sample_context
struct lp_type leveli_type;
struct lp_build_context leveli_bld;
+ /** Float lod type */
+ struct lp_type lodf_type;
+ struct lp_build_context lodf_bld;
+
+ /** Int lod type */
+ struct lp_type lodi_type;
+ struct lp_build_context lodi_bld;
+
/* Common dynamic state values */
LLVMValueRef row_stride_array;
LLVMValueRef img_stride_array;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 7431388812d..c35b628270e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel0,
&size0,
&row_stride0_vec, &img_stride0_vec);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
}
else {
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
- bld->levelf_bld.type, 256.0);
- LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
+ bld->lodf_bld.type, 256.0);
+ LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
struct lp_build_if_state if_ctx;
LLVMValueRef need_lerp;
unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
/* need_lerp = lod_fpart > 0 */
if (bld->num_lods == 1) {
need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
- lod_fpart, bld->leveli_bld.zero,
+ lod_fpart, bld->lodi_bld.zero,
"need_lerp");
}
else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
* lod_fpart values have same sign.
* We can however then skip the greater than comparison.
*/
- lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
- bld->leveli_bld.zero);
- need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
+ lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
+ bld->lodi_bld.zero);
+ need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
}
lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel1,
&size1,
&row_stride1_vec, &img_stride1_vec);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
}
else {
@@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
}
else {
unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
- LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
+ LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
/* Take the LSB of lod_fpart */
@@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* some max probably could hack up the weights in the linear
* path with selects to work for nearest.
*/
- if (bld->leveli_bld.type.length > 1)
+ if (bld->num_lods > 1)
lod_positive = LLVMBuildExtractElement(builder, lod_positive,
lp_build_const_int32(bld->gallivm, 0), "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 8ad3b9f246a..c686d82de57 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel0,
&size0,
&row_stride0_vec, &img_stride0_vec);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
}
else {
@@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
/* need_lerp = lod_fpart > 0 */
if (bld->num_lods == 1) {
need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
- lod_fpart, bld->levelf_bld.zero,
+ lod_fpart, bld->lodf_bld.zero,
"need_lerp");
}
else {
@@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
* negative values which would screw up filtering if not all
* lod_fpart values have same sign.
*/
- lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
- bld->levelf_bld.zero);
- need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
+ lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
+ bld->lodf_bld.zero);
+ need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
PIPE_FUNC_GREATER,
- lod_fpart, bld->levelf_bld.zero);
- need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
+ lod_fpart, bld->lodf_bld.zero);
+ need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
}
lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
lp_build_mipmap_level_sizes(bld, ilevel1,
&size1,
&row_stride1_vec, &img_stride1_vec);
- if (bld->num_lods == 1) {
+ if (bld->num_mips == 1) {
data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
}
else {
@@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
if (bld->num_lods != bld->coord_type.length)
lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
- bld->levelf_bld.type,
+ bld->lodf_bld.type,
bld->texel_bld.type,
lod_fpart);
@@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
mip_filter,
&lod_ipart, lod_fpart, lod_pos_or_zero);
} else {
- lod_ipart = bld->leveli_bld.zero;
- *lod_pos_or_zero = bld->leveli_bld.zero;
+ lod_ipart = bld->lodi_bld.zero;
+ *lod_pos_or_zero = bld->lodi_bld.zero;
+ }
+
+ if (bld->num_lods != bld->num_mips) {
+ /* only makes sense if there's just a single mip level */
+ assert(bld->num_mips == 1);
+ lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
}
/*
@@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
* some max probably could hack up the weights in the linear
* path with selects to work for nearest.
*/
- if (bld->leveli_bld.type.length > 1)
+ if (bld->num_lods > 1)
lod_positive = LLVMBuildExtractElement(builder, lod_positive,
lp_build_const_int32(bld->gallivm, 0), "");
@@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
const LLVMValueRef *offsets,
LLVMValueRef *colors_out)
{
- struct lp_build_context *perquadi_bld = &bld->leveli_bld;
+ struct lp_build_context *perquadi_bld = &bld->lodi_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
unsigned dims = bld->dims, chan;
unsigned target = bld->static_texture_state->target;
@@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
out_of_bounds = int_coord_bld->zero;
if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
- if (bld->num_lods != int_coord_bld->type.length) {
+ if (bld->num_mips != int_coord_bld->type.length) {
ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
perquadi_bld->type, explicit_lod, 0);
}
@@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
out_of_bound_ret_zero ? &out_of_bounds : NULL);
}
else {
- assert(bld->num_lods == 1);
+ assert(bld->num_mips == 1);
if (bld->static_texture_state->target != PIPE_BUFFER) {
ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
bld->gallivm, texture_unit);
@@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
unsigned target = static_texture_state->target;
unsigned dims = texture_dims(target);
unsigned num_quads = type.length / 4;
- unsigned mip_filter, i;
+ unsigned mip_filter, min_img_filter, mag_img_filter, i;
struct lp_build_sample_context bld;
struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
@@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
}
+ min_img_filter = static_sampler_state->min_img_filter;
+ mag_img_filter = static_sampler_state->mag_img_filter;
+
+
/*
* This is all a bit complicated different paths are chosen for performance
* reasons.
@@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
/*
* There are other situations where at least the multiple int lods could be
* avoided like min and max lod being equal.
- * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still
- * have length 4. Because lod_selector is always using per quad calcs in this
- * case, but minification etc. don't need to bother. This is very brittle though
- * e.g. num_lods might be 1 but still have multiple positive_lod values!
*/
+ bld.num_mips = bld.num_lods = 1;
if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
(explicit_lod || lod_bias ||
- (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
- ((is_fetch && target != PIPE_BUFFER) ||
- (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
- bld.num_lods = type.length;
+ (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+ if ((is_fetch && target != PIPE_BUFFER) ||
+ (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ bld.num_mips = type.length;
+ bld.num_lods = type.length;
+ }
+ else if (!is_fetch && min_img_filter != mag_img_filter) {
+ bld.num_mips = 1;
+ bld.num_lods = type.length;
+ }
+ }
/* TODO: for true scalar_lod should only use 1 lod value */
- else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) ||
+ else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
(!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ bld.num_mips = num_quads;
bld.num_lods = num_quads;
}
- else {
- bld.num_lods = 1;
+ else if (!is_fetch && min_img_filter != mag_img_filter) {
+ bld.num_mips = 1;
+ bld.num_lods = num_quads;
}
- bld.levelf_type = type;
+
+ bld.lodf_type = type;
/* we want native vector size to be able to use our intrinsics */
if (bld.num_lods != type.length) {
- bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+ /* TODO: this currently always has to be per-quad or per-element */
+ bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+ }
+ bld.lodi_type = lp_int_type(bld.lodf_type);
+ bld.levelf_type = bld.lodf_type;
+ if (bld.num_mips == 1) {
+ bld.levelf_type.length = 1;
}
bld.leveli_type = lp_int_type(bld.levelf_type);
bld.float_size_type = bld.float_size_in_type;
/* Note: size vectors may not be native. They contain minified w/h/d/_ values,
* with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
- if (bld.num_lods > 1) {
- bld.float_size_type.length = bld.num_lods == type.length ?
- bld.num_lods * bld.float_size_in_type.length :
+ if (bld.num_mips > 1) {
+ bld.float_size_type.length = bld.num_mips == type.length ?
+ bld.num_mips * bld.float_size_in_type.length :
type.length;
}
bld.int_size_type = lp_int_type(bld.float_size_type);
@@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
+ lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
+ lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
/* Get the dynamic state */
tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
* (It should be faster if we'd support avx2)
*/
if (num_quads == 1 || !use_aos) {
-
- if (num_quads > 1) {
- if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
- LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
- /*
- * This parameter is the same for all quads could probably simplify.
- */
- ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, "");
- }
- }
if (use_aos) {
/* do sampling/filtering with fixed pt arithmetic */
lp_build_sample_aos(&bld, sampler_index,
@@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
bld4.texel_type = bld.texel_type;
bld4.texel_type.length = 4;
- bld4.levelf_type = type4;
- /* we want native vector size to be able to use our intrinsics */
- bld4.levelf_type.length = 1;
- bld4.leveli_type = lp_int_type(bld4.levelf_type);
+ bld4.num_mips = bld4.num_lods = 1;
if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
- (explicit_lod || lod_bias ||
- (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) &&
- ((is_fetch && target != PIPE_BUFFER) ||
- (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
- bld4.num_lods = type4.length;
- else
- bld4.num_lods = 1;
+ (explicit_lod || lod_bias ||
+ (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) {
+ if ((is_fetch && target != PIPE_BUFFER) ||
+ (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ bld4.num_mips = type4.length;
+ bld4.num_lods = type4.length;
+ }
+ else if (!is_fetch && min_img_filter != mag_img_filter) {
+ bld4.num_mips = 1;
+ bld4.num_lods = type4.length;
+ }
+ }
- bld4.levelf_type = type4;
/* we want native vector size to be able to use our intrinsics */
+ bld4.lodf_type = type4;
if (bld4.num_lods != type4.length) {
+ bld4.lodf_type.length = 1;
+ }
+ bld4.lodi_type = lp_int_type(bld4.lodf_type);
+ bld4.levelf_type = type4;
+ if (bld4.num_mips != type4.length) {
bld4.levelf_type.length = 1;
}
bld4.leveli_type = lp_int_type(bld4.levelf_type);
bld4.float_size_type = bld4.float_size_in_type;
- if (bld4.num_lods > 1) {
- bld4.float_size_type.length = bld4.num_lods == type4.length ?
- bld4.num_lods * bld4.float_size_in_type.length :
+ if (bld4.num_mips > 1) {
+ bld4.float_size_type.length = bld4.num_mips == type4.length ?
+ bld4.num_mips * bld4.float_size_in_type.length :
type4.length;
}
bld4.int_size_type = lp_int_type(bld4.float_size_type);
@@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
+ lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
+ lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
for (i = 0; i < num_quads; i++) {
LLVMValueRef s4, t4, r4;
@@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
}
}
lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
- ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
+ ilevel04 = bld.num_mips == 1 ? ilevel0 :
+ lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);