diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 126 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 20 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 141 |
4 files changed, 169 insertions, 131 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 89d72494be0..e1cfd78e885 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld, struct lp_build_context *float_size_bld = &bld->float_size_in_bld; struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *levelf_bld = &bld->levelf_bld; + struct lp_build_context *rho_bld = &bld->lodf_bld; const unsigned dims = bld->dims; LLVMValueRef ddx_ddy[2]; LLVMBuilderRef builder = bld->gallivm->builder; @@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef first_level, first_level_vec; unsigned length = coord_bld->type.length; unsigned num_quads = length / 4; - boolean rho_per_quad = levelf_bld->type.length != length; + boolean rho_per_quad = rho_bld->type.length != length; unsigned i; LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef rho_xvec, rho_yvec; @@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld, */ if (rho_per_quad) { rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - levelf_bld->type, cube_rho, 0); + rho_bld->type, cube_rho, 0); } else { rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4); } if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) { - rho = lp_build_sqrt(levelf_bld, rho); + rho = lp_build_sqrt(rho_bld, rho); } /* Could optimize this for single quad just skip the broadcast */ cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type, - levelf_bld->type, float_size, index0); - rho = lp_build_mul(levelf_bld, cubesize, rho); + rho_bld->type, float_size, index0); + rho = lp_build_mul(rho_bld, cubesize, rho); } else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) { LLVMValueRef ddmax[3], ddx[3], ddy[3]; @@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld, * otherwise would also need different code to per-pixel lod case. */ rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - levelf_bld->type, rho, 0); + rho_bld->type, rho, 0); } - rho = lp_build_sqrt(levelf_bld, rho); + rho = lp_build_sqrt(rho_bld, rho); } else { @@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld, * rho_vec contains per-pixel rho, convert to scalar per quad. */ rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - levelf_bld->type, rho, 0); + rho_bld->type, rho, 0); } } } @@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld, if (rho_per_quad) { rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - levelf_bld->type, rho, 0); + rho_bld->type, rho, 0); } else { /* @@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld, */ rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); } - rho = lp_build_sqrt(levelf_bld, rho); + rho = lp_build_sqrt(rho_bld, rho); } else { ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); @@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld, } if (rho_per_quad) { rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - levelf_bld->type, rho, 0); + rho_bld->type, rho, 0); } else { rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); @@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld, } } if (!rho_per_quad) { - rho = lp_build_broadcast_scalar(levelf_bld, rho); + rho = lp_build_broadcast_scalar(rho_bld, rho); } } } @@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld, * \param out_lod_fpart float part of lod (never larger than 1 but may be negative) * \param out_lod_positive (mask) if lod is positive (i.e. texture is minified) * - * The resulting lod is scalar per quad, so only the first value per quad - * passed in from lod_bias, explicit_lod is used. + * The resulting lod can be scalar per quad or be per element. */ void lp_build_lod_selector(struct lp_build_sample_context *bld, @@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, { LLVMBuilderRef builder = bld->gallivm->builder; - struct lp_build_context *levelf_bld = &bld->levelf_bld; + struct lp_build_context *lodf_bld = &bld->lodf_bld; LLVMValueRef lod; - *out_lod_ipart = bld->leveli_bld.zero; - *out_lod_positive = bld->leveli_bld.zero; - *out_lod_fpart = levelf_bld->zero; + *out_lod_ipart = bld->lodi_bld.zero; + *out_lod_positive = bld->lodi_bld.zero; + *out_lod_fpart = lodf_bld->zero; /* * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification: @@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, sampler_unit); - lod = lp_build_broadcast_scalar(levelf_bld, min_lod); + lod = lp_build_broadcast_scalar(lodf_bld, min_lod); } else { if (explicit_lod) { if (bld->num_lods != bld->coord_type.length) lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, - levelf_bld->type, explicit_lod, 0); + lodf_bld->type, explicit_lod, 0); else lod = explicit_lod; } @@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, * Don't actually need both all the time, ipart is needed * for nearest mipfilter, pos_or_zero if min != mag. */ - *out_lod_ipart = lp_build_ilog2(levelf_bld, rho); - *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER, - rho, levelf_bld->one); + *out_lod_ipart = lp_build_ilog2(lodf_bld, rho); + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, + rho, lodf_bld->one); return; } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { - lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR, + lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); - *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER, - rho, levelf_bld->one); + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, + rho, lodf_bld->one); return; } } if (0) { - lod = lp_build_log2(levelf_bld, rho); + lod = lp_build_log2(lodf_bld, rho); } else { - lod = lp_build_fast_log2(levelf_bld, rho); + lod = lp_build_fast_log2(lodf_bld, rho); } /* add shader lod bias */ if (lod_bias) { if (bld->num_lods != bld->coord_type.length) lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, - levelf_bld->type, lod_bias, 0); + lodf_bld->type, lod_bias, 0); lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } @@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef sampler_lod_bias = bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, sampler_unit); - sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld, + sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld, sampler_lod_bias); lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias"); } @@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef max_lod = bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, sampler_unit); - max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod); + max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod); - lod = lp_build_min(levelf_bld, lod, max_lod); + lod = lp_build_min(lodf_bld, lod, max_lod); } if (bld->static_sampler_state->apply_min_lod) { LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, sampler_unit); - min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod); + min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod); - lod = lp_build_max(levelf_bld, lod, min_lod); + lod = lp_build_max(lodf_bld, lod, min_lod); } } - *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER, - lod, levelf_bld->zero); + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, + lod, lodf_bld->zero); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { - lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR, + lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { - lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart); + lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart); } lp_build_name(*out_lod_fpart, "lod_fpart"); } else { - *out_lod_ipart = lp_build_iround(levelf_bld, lod); + *out_lod_ipart = lp_build_iround(lodf_bld, lod); } lp_build_name(*out_lod_ipart, "lod_ipart"); @@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level); out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level); out = lp_build_or(leveli_bld, out, out1); - if (bld->num_lods == bld->coord_bld.type.length) { + if (bld->num_mips == bld->coord_bld.type.length) { *out_of_bounds = out; } - else if (bld->num_lods == 1) { + else if (bld->num_mips == 1) { *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out); } else { - assert(bld->num_lods == bld->coord_bld.type.length / 4); + assert(bld->num_mips == bld->coord_bld.type.length / 4); *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, leveli_bld->type, bld->int_coord_bld.type, @@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld, /** - * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad) - * (adjacent) mipmap level indexes, and fix up float lod part accordingly. + * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s) + * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod + * part accordingly. * Later, we'll sample from those two mipmap levels and interpolate between them. */ void @@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, LLVMValueRef clamp_min; LLVMValueRef clamp_max; + assert(bld->num_lods == bld->num_mips); + first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, texture_unit); last_level = bld->dynamic_state->last_level(bld->dynamic_state, @@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld, LLVMValueRef indexes[2], offsets, offset1; indexes[0] = lp_build_const_int32(bld->gallivm, 0); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { indexes[1] = level; offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); offset1 = LLVMBuildLoad(builder, offset1, ""); offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1); } - else if (bld->num_lods == bld->coord_bld.type.length / 4) { + else if (bld->num_mips == bld->coord_bld.type.length / 4) { unsigned i; offsets = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_lods; i++) { + for (i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); @@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld, else { unsigned i; - assert (bld->num_lods == bld->coord_bld.type.length); + assert (bld->num_mips == bld->coord_bld.type.length); offsets = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_lods; i++) { + for (i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); @@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef indexes[2], stride, stride1; indexes[0] = lp_build_const_int32(bld->gallivm, 0); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { indexes[1] = level; stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); stride1 = LLVMBuildLoad(builder, stride1, ""); stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1); } - else if (bld->num_lods == bld->coord_bld.type.length / 4) { + else if (bld->num_mips == bld->coord_bld.type.length / 4) { LLVMValueRef stride1; unsigned i; stride = bld->int_coord_bld.undef; - for (i = 0; i < bld->num_lods; i++) { + for (i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); @@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, LLVMValueRef stride1; unsigned i; - assert (bld->num_lods == bld->coord_bld.type.length); + assert (bld->num_mips == bld->coord_bld.type.length); stride = bld->int_coord_bld.undef; for (i = 0; i < bld->coord_bld.type.length; i++) { @@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, /* * Compute width, height, depth at mipmap level 'ilevel' */ - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); } @@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, unsigned num_quads = bld->coord_bld.type.length / 4; unsigned i; - if (bld->num_lods == num_quads) { + if (bld->num_mips == num_quads) { /* * XXX: this should be #ifndef SANE_INSTRUCTION_SET. * intel "forgot" the variable shift count instruction until avx2. @@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, * For dims == 1 this will create [w0, w1, w2, w3, ...] vector. * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector. */ - assert(bld->num_lods == bld->coord_bld.type.length); + assert(bld->num_mips == bld->coord_bld.type.length); if (bld->dims == 1) { assert(bld->int_size_in_bld.type.length == 1); int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, @@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, } else { LLVMValueRef ilevel1; - for (i = 0; i < bld->num_lods; i++) { + for (i = 0; i < bld->num_mips; i++) { LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type, bld->int_size_in_bld.type, ilevel, indexi); @@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, } *out_size = lp_build_concat(bld->gallivm, tmp, bld->int_size_in_bld.type, - bld->num_lods); + bld->num_mips); } } } @@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); struct lp_type size_type = size_bld->type; - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { *out_width = lp_build_extract_broadcast(bld->gallivm, size_type, coord_type, @@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, if (dims == 1) { *out_width = size; } - else if (bld->num_lods == num_quads) { + else if (bld->num_mips == num_quads) { *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4); if (dims >= 2) { *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4); @@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, } } else { - assert(bld->num_lods == bld->coord_type.length); + assert(bld->num_mips == bld->coord_type.length); *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, coord_type, size, 0); if (dims >= 2) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index a7ebe7e9ed8..e6b9f30d7bb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -233,7 +233,10 @@ struct lp_build_sample_context /** SIMD vector width */ unsigned vector_width; - /** number of lod values (valid are 1, length/4, length) */ + /** number of mipmaps (valid are 1, length/4, length) */ + unsigned num_mips; + + /** number of lod values (valid are 1, length/4, length) */ unsigned num_lods; /** regular scalar float type */ @@ -283,6 +286,14 @@ struct lp_build_sample_context struct lp_type leveli_type; struct lp_build_context leveli_bld; + /** Float lod type */ + struct lp_type lodf_type; + struct lp_build_context lodf_bld; + + /** Int lod type */ + struct lp_type lodi_type; + struct lp_build_context lodi_bld; + /* Common dynamic state values */ LLVMValueRef row_stride_array; LLVMValueRef img_stride_array; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 7431388812d..c35b628270e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); } else { @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, - bld->levelf_bld.type, 256.0); - LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type; + bld->lodf_bld.type, 256.0); + LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type; struct lp_build_if_state if_ctx; LLVMValueRef need_lerp; unsigned num_quads = bld->coord_bld.type.length / 4; @@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* need_lerp = lod_fpart > 0 */ if (bld->num_lods == 1) { need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, - lod_fpart, bld->leveli_bld.zero, + lod_fpart, bld->lodi_bld.zero, "need_lerp"); } else { @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, * lod_fpart values have same sign. * We can however then skip the greater than comparison. */ - lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart, - bld->leveli_bld.zero); - need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart); + lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart, + bld->lodi_bld.zero); + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart); } lp_build_if(&if_ctx, bld->gallivm, need_lerp); @@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); } else { @@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, } else { unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods; - LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length); + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length); LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; /* Take the LSB of lod_fpart */ @@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * some max probably could hack up the weights in the linear * path with selects to work for nearest. */ - if (bld->leveli_bld.type.length > 1) + if (bld->num_lods > 1) lod_positive = LLVMBuildExtractElement(builder, lod_positive, lp_build_const_int32(bld->gallivm, 0), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 8ad3b9f246a..c686d82de57 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_mipmap_level_sizes(bld, ilevel0, &size0, &row_stride0_vec, &img_stride0_vec); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); } else { @@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, /* need_lerp = lod_fpart > 0 */ if (bld->num_lods == 1) { need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, - lod_fpart, bld->levelf_bld.zero, + lod_fpart, bld->lodf_bld.zero, "need_lerp"); } else { @@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, * negative values which would screw up filtering if not all * lod_fpart values have same sign. */ - lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart, - bld->levelf_bld.zero); - need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type, + lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, + bld->lodf_bld.zero); + need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, PIPE_FUNC_GREATER, - lod_fpart, bld->levelf_bld.zero); - need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp); + lod_fpart, bld->lodf_bld.zero); + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp); } lp_build_if(&if_ctx, bld->gallivm, need_lerp); @@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_mipmap_level_sizes(bld, ilevel1, &size1, &row_stride1_vec, &img_stride1_vec); - if (bld->num_lods == 1) { + if (bld->num_mips == 1) { data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); } else { @@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, if (bld->num_lods != bld->coord_type.length) lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, - bld->levelf_bld.type, + bld->lodf_bld.type, bld->texel_bld.type, lod_fpart); @@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context *bld, mip_filter, &lod_ipart, lod_fpart, lod_pos_or_zero); } else { - lod_ipart = bld->leveli_bld.zero; - *lod_pos_or_zero = bld->leveli_bld.zero; + lod_ipart = bld->lodi_bld.zero; + *lod_pos_or_zero = bld->lodi_bld.zero; + } + + if (bld->num_lods != bld->num_mips) { + /* only makes sense if there's just a single mip level */ + assert(bld->num_mips == 1); + lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); } /* @@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, * some max probably could hack up the weights in the linear * path with selects to work for nearest. */ - if (bld->leveli_bld.type.length > 1) + if (bld->num_lods > 1) lod_positive = LLVMBuildExtractElement(builder, lod_positive, lp_build_const_int32(bld->gallivm, 0), ""); @@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, const LLVMValueRef *offsets, LLVMValueRef *colors_out) { - struct lp_build_context *perquadi_bld = &bld->leveli_bld; + struct lp_build_context *perquadi_bld = &bld->lodi_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; unsigned dims = bld->dims, chan; unsigned target = bld->static_texture_state->target; @@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, out_of_bounds = int_coord_bld->zero; if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { - if (bld->num_lods != int_coord_bld->type.length) { + if (bld->num_mips != int_coord_bld->type.length) { ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, perquadi_bld->type, explicit_lod, 0); } @@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, out_of_bound_ret_zero ? &out_of_bounds : NULL); } else { - assert(bld->num_lods == 1); + assert(bld->num_mips == 1); if (bld->static_texture_state->target != PIPE_BUFFER) { ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, texture_unit); @@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, unsigned target = static_texture_state->target; unsigned dims = texture_dims(target); unsigned num_quads = type.length / 4; - unsigned mip_filter, i; + unsigned mip_filter, min_img_filter, mag_img_filter, i; struct lp_build_sample_context bld; struct lp_static_sampler_state derived_sampler_state = *static_sampler_state; LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); @@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm, debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter); } + min_img_filter = static_sampler_state->min_img_filter; + mag_img_filter = static_sampler_state->mag_img_filter; + + /* * This is all a bit complicated different paths are chosen for performance * reasons. @@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm, /* * There are other situations where at least the multiple int lods could be * avoided like min and max lod being equal. - * XXX if num_lods == 1 (for multiple quads) the level bld contexts will still - * have length 4. Because lod_selector is always using per quad calcs in this - * case, but minification etc. don't need to bother. This is very brittle though - * e.g. num_lods might be 1 but still have multiple positive_lod values! */ + bld.num_mips = bld.num_lods = 1; if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && (explicit_lod || lod_bias || - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) && - ((is_fetch && target != PIPE_BUFFER) || - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE))) - bld.num_lods = type.length; + (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) { + if ((is_fetch && target != PIPE_BUFFER) || + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { + bld.num_mips = type.length; + bld.num_lods = type.length; + } + else if (!is_fetch && min_img_filter != mag_img_filter) { + bld.num_mips = 1; + bld.num_lods = type.length; + } + } /* TODO: for true scalar_lod should only use 1 lod value */ - else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) || + else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) || (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { + bld.num_mips = num_quads; bld.num_lods = num_quads; } - else { - bld.num_lods = 1; + else if (!is_fetch && min_img_filter != mag_img_filter) { + bld.num_mips = 1; + bld.num_lods = num_quads; } - bld.levelf_type = type; + + bld.lodf_type = type; /* we want native vector size to be able to use our intrinsics */ if (bld.num_lods != type.length) { - bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; + /* TODO: this currently always has to be per-quad or per-element */ + bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; + } + bld.lodi_type = lp_int_type(bld.lodf_type); + bld.levelf_type = bld.lodf_type; + if (bld.num_mips == 1) { + bld.levelf_type.length = 1; } bld.leveli_type = lp_int_type(bld.levelf_type); bld.float_size_type = bld.float_size_in_type; /* Note: size vectors may not be native. They contain minified w/h/d/_ values, * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */ - if (bld.num_lods > 1) { - bld.float_size_type.length = bld.num_lods == type.length ? - bld.num_lods * bld.float_size_in_type.length : + if (bld.num_mips > 1) { + bld.float_size_type.length = bld.num_mips == type.length ? + bld.num_mips * bld.float_size_in_type.length : type.length; } bld.int_size_type = lp_int_type(bld.float_size_type); @@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); + lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); + lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); /* Get the dynamic state */ tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index); @@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm, * (It should be faster if we'd support avx2) */ if (num_quads == 1 || !use_aos) { - - if (num_quads > 1) { - if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { - LLVMValueRef index0 = lp_build_const_int32(gallivm, 0); - /* - * This parameter is the same for all quads could probably simplify. - */ - ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, ""); - } - } if (use_aos) { /* do sampling/filtering with fixed pt arithmetic */ lp_build_sample_aos(&bld, sampler_index, @@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); bld4.texel_type = bld.texel_type; bld4.texel_type.length = 4; - bld4.levelf_type = type4; - /* we want native vector size to be able to use our intrinsics */ - bld4.levelf_type.length = 1; - bld4.leveli_type = lp_int_type(bld4.levelf_type); + bld4.num_mips = bld4.num_lods = 1; if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && - (explicit_lod || lod_bias || - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) && - ((is_fetch && target != PIPE_BUFFER) || - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE))) - bld4.num_lods = type4.length; - else - bld4.num_lods = 1; + (explicit_lod || lod_bias || + (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) { + if ((is_fetch && target != PIPE_BUFFER) || + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { + bld4.num_mips = type4.length; + bld4.num_lods = type4.length; + } + else if (!is_fetch && min_img_filter != mag_img_filter) { + bld4.num_mips = 1; + bld4.num_lods = type4.length; + } + } - bld4.levelf_type = type4; /* we want native vector size to be able to use our intrinsics */ + bld4.lodf_type = type4; if (bld4.num_lods != type4.length) { + bld4.lodf_type.length = 1; + } + bld4.lodi_type = lp_int_type(bld4.lodf_type); + bld4.levelf_type = type4; + if (bld4.num_mips != type4.length) { bld4.levelf_type.length = 1; } bld4.leveli_type = lp_int_type(bld4.levelf_type); bld4.float_size_type = bld4.float_size_in_type; - if (bld4.num_lods > 1) { - bld4.float_size_type.length = bld4.num_lods == type4.length ? - bld4.num_lods * bld4.float_size_in_type.length : + if (bld4.num_mips > 1) { + bld4.float_size_type.length = bld4.num_mips == type4.length ? + bld4.num_mips * bld4.float_size_in_type.length : type4.length; } bld4.int_size_type = lp_int_type(bld4.float_size_type); @@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); + lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); + lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); for (i = 0; i < num_quads; i++) { LLVMValueRef s4, t4, r4; @@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, } } lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods); - ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); + ilevel04 = bld.num_mips == 1 ? ilevel0 : + lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods); lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods); |