From 0b6554ba6f2aa8a771852566340c24205e406d02 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 27 Nov 2012 03:26:49 +0100 Subject: gallivm,llvmpipe: handle TXF (texelFetch) instruction, including offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also adds some code to handle per-quad lods for more than 4-wide fetches, because otherwise I'd have to integrate the texelFetch function into the splitting stuff... (but it is not used yet outside texelFetch). passes piglit fs-texelFetch-2D, fails fs-texelFetchOffset-2D due to I believe a test error (results are undefined for out-of-bounds fetches, we return whatever is at offset 0, whereas the test expects [0,0,0,1]). Texel offsets are only handled by texelFetch for now, though the interface can handle it for everything. Reviewed-by: José Fonseca --- src/gallium/auxiliary/draw/draw_llvm_sample.c | 9 +- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 268 +++++++++++++++++++--- src/gallium/auxiliary/gallivm/lp_bld_sample.h | 25 +- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 8 +- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 167 ++++++++++++-- src/gallium/auxiliary/gallivm/lp_bld_swizzle.c | 9 +- src/gallium/auxiliary/gallivm/lp_bld_swizzle.h | 3 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 60 +++++ src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 11 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 115 +++++++++- src/gallium/auxiliary/tgsi/tgsi_info.c | 1 + 11 files changed, 609 insertions(+), 67 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c index 0892d16bd6d..67d4e9339d6 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_sample.c +++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c @@ -171,9 +171,10 @@ static void draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, struct gallivm_state *gallivm, struct lp_type type, + boolean is_fetch, unsigned unit, - unsigned num_coords, const LLVMValueRef *coords, + const LLVMValueRef *offsets, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ @@ -187,8 +188,10 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, &sampler->dynamic_state.static_state[unit], &sampler->dynamic_state.base, type, + is_fetch, unit, - num_coords, coords, + coords, + offsets, derivs, lod_bias, explicit_lod, texel); @@ -213,7 +216,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, lp_build_size_query_soa(gallivm, &sampler->dynamic_state.static_state[unit], &sampler->dynamic_state.base, - type, + type, unit, explicit_lod, sizes_out); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 0727fd2b91a..ea7dd95b78b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -186,8 +186,8 @@ lp_build_rho(struct lp_build_sample_context *bld, const struct lp_derivatives *derivs) { struct gallivm_state *gallivm = bld->gallivm; - struct lp_build_context *int_size_bld = &bld->int_size_bld; - struct lp_build_context *float_size_bld = &bld->float_size_bld; + struct lp_build_context *int_size_bld = &bld->int_size_in_bld; + struct lp_build_context *float_size_bld = &bld->float_size_in_bld; struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *perquadf_bld = &bld->perquadf_bld; @@ -316,7 +316,7 @@ lp_build_rho(struct lp_build_sample_context *bld, } } rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, - perquadf_bld->type, rho); + perquadf_bld->type, rho, 0); } else { if (dims <= 1) { @@ -517,7 +517,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, else { if (explicit_lod) { lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, - perquadf_bld->type, explicit_lod); + perquadf_bld->type, explicit_lod, 0); } else { LLVMValueRef rho; @@ -562,7 +562,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* add shader lod bias */ if (lod_bias) { lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type, - perquadf_bld->type, lod_bias); + perquadf_bld->type, lod_bias, 0); lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } @@ -725,7 +725,6 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, /** * Return pointer to a single mipmap level. - * \param data_array array of pointers to mipmap levels * \param level integer mipmap level */ LLVMValueRef @@ -743,6 +742,55 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld, return data_ptr; } +/** + * Return (per-pixel) offsets to mip levels. + * \param level integer mipmap level + */ +LLVMValueRef +lp_build_get_mip_offsets(struct lp_build_sample_context *bld, + LLVMValueRef level) +{ + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef indexes[2], offsets, offset1; + + indexes[0] = lp_build_const_int32(bld->gallivm, 0); + if (bld->num_lods == 1) { + indexes[1] = level; + offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); + offset1 = LLVMBuildLoad(builder, offset1, ""); + offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1); + } + else if (bld->num_lods == bld->coord_bld.type.length / 4) { + unsigned i; + + offsets = bld->int_coord_bld.undef; + for (i = 0; i < bld->num_lods; i++) { + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); + indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); + offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); + offset1 = LLVMBuildLoad(builder, offset1, ""); + offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, ""); + } + offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0); + } + else { + unsigned i; + + assert (bld->num_lods == bld->coord_bld.type.length); + + offsets = bld->int_coord_bld.undef; + for (i = 0; i < bld->num_lods; i++) { + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); + offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); + offset1 = LLVMBuildLoad(builder, offset1, ""); + offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, ""); + } + } + return offsets; +} + /** * Codegen equivalent for u_minify(). @@ -780,12 +828,44 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, LLVMValueRef stride_array, LLVMValueRef level) { LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef indexes[2], stride; + LLVMValueRef indexes[2], stride, stride1; indexes[0] = lp_build_const_int32(bld->gallivm, 0); - indexes[1] = level; - stride = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); - stride = LLVMBuildLoad(builder, stride, ""); - stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + if (bld->num_lods == 1) { + indexes[1] = level; + stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); + stride1 = LLVMBuildLoad(builder, stride1, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1); + } + else if (bld->num_lods == bld->coord_bld.type.length / 4) { + LLVMValueRef stride1; + unsigned i; + + stride = bld->int_coord_bld.undef; + for (i = 0; i < bld->num_lods; i++) { + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i); + indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); + stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); + stride1 = LLVMBuildLoad(builder, stride1, ""); + stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, ""); + } + stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0); + } + else { + LLVMValueRef stride1; + unsigned i; + + assert (bld->num_lods == bld->coord_bld.type.length); + + stride = bld->int_coord_bld.undef; + for (i = 0; i < bld->coord_bld.type.length; i++) { + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); + stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); + stride1 = LLVMBuildLoad(builder, stride1, ""); + stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, ""); + } + } return stride; } @@ -805,12 +885,102 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, const unsigned dims = bld->dims; LLVMValueRef ilevel_vec; - ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); - /* * Compute width, height, depth at mipmap level 'ilevel' */ - *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + if (bld->num_lods == 1) { + ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); + *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); + } + else { + LLVMValueRef int_size_vec; + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned num_quads = bld->coord_bld.type.length / 4; + unsigned i; + + if (bld->num_lods == num_quads) { + /* + * XXX: this should be #ifndef SANE_INSTRUCTION_SET. + * intel "forgot" the variable shift count instruction until avx2. + * A harmless 8x32 shift gets translated into 32 instructions + * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently + * unable to recognize if there are really just 2 different shift + * count values. So do the shift 4-wide before expansion. + */ + struct lp_build_context bld4; + struct lp_type type4; + + type4 = bld->int_coord_bld.type; + type4.length = 4; + + lp_build_context_init(&bld4, bld->gallivm, type4); + + if (bld->dims == 1) { + assert(bld->int_size_in_bld.type.length == 1); + int_size_vec = lp_build_broadcast_scalar(&bld4, + bld->int_size); + } + else { + assert(bld->int_size_in_bld.type.length == 4); + int_size_vec = bld->int_size; + } + + for (i = 0; i < num_quads; i++) { + LLVMValueRef ileveli; + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + + ileveli = lp_build_extract_broadcast(bld->gallivm, + bld->perquadi_bld.type, + bld4.type, + ilevel, + indexi); + tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli); + } + /* + * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1, + * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise. + */ + *out_size = lp_build_concat(bld->gallivm, + tmp, + bld4.type, + num_quads); + } + else { + /* FIXME: this is terrible and results in _huge_ vector + * (for the dims > 1 case). + * Should refactor this (together with extract_image_sizes) and do + * something more useful. Could for instance if we have width,height + * with 4-wide vector pack all elements into a 8xi16 vector + * (on which we can still do useful math) instead of using a 16xi32 + * vector. + * FIXME: some callers can't handle this yet. + * For dims == 1 this will create [w0, w1, w2, w3, ...] vector. + * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector. + */ + assert(bld->num_lods == bld->coord_bld.type.length); + if (bld->dims == 1) { + assert(bld->int_size_bld.type.length == 1); + int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, + bld->int_size); + /* vector shift with variable shift count alert... */ + *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel); + } + else { + LLVMValueRef ilevel1; + for (i = 0; i < bld->num_lods; i++) { + LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); + ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type, + bld->int_size_in_bld.type, ilevel, indexi); + tmp[i] = bld->int_size; + tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1); + } + int_size_vec = lp_build_concat(bld->gallivm, + tmp, + bld->int_size_in_bld.type, + bld->num_lods); + } + } + } if (dims >= 2) { *row_stride_vec = lp_build_get_level_stride_vec(bld, @@ -836,7 +1006,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, */ void lp_build_extract_image_sizes(struct lp_build_sample_context *bld, - struct lp_type size_type, + struct lp_build_context *size_bld, struct lp_type coord_type, LLVMValueRef size, LLVMValueRef *out_width, @@ -845,24 +1015,56 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, { const unsigned dims = bld->dims; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); + struct lp_type size_type = size_bld->type; + + if (bld->num_lods == 1) { + *out_width = lp_build_extract_broadcast(bld->gallivm, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 0, 0)); + if (dims >= 2) { + *out_height = lp_build_extract_broadcast(bld->gallivm, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 1, 0)); + if (dims == 3) { + *out_depth = lp_build_extract_broadcast(bld->gallivm, + size_type, + coord_type, + size, + LLVMConstInt(i32t, 2, 0)); + } + } + } + else { + unsigned num_quads = bld->coord_bld.type.length / 4; - *out_width = lp_build_extract_broadcast(bld->gallivm, - size_type, - coord_type, - size, - LLVMConstInt(i32t, 0, 0)); - if (dims >= 2) { - *out_height = lp_build_extract_broadcast(bld->gallivm, - size_type, - coord_type, - size, - LLVMConstInt(i32t, 1, 0)); - if (dims == 3) { - *out_depth = lp_build_extract_broadcast(bld->gallivm, - size_type, - coord_type, - size, - LLVMConstInt(i32t, 2, 0)); + if (dims == 1) { + *out_width = size; + } + else if (bld->num_lods == num_quads) { + *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0); + if (dims >= 2) { + *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1); + if (dims == 3) { + *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2); + } + } + } + else { + assert(bld->num_lods == bld->coord_type.length); + *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, + coord_type, size, 0); + if (dims >= 2) { + *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, + coord_type, size, 1); + if (dims == 3) { + *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, + coord_type, size, 2); + } + } } } } @@ -886,7 +1088,7 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld, LLVMValueRef depth; lp_build_extract_image_sizes(bld, - bld->float_size_type, + &bld->float_size_bld, bld->coord_type, flt_size, &width, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index d8a068d5497..7fc432cb4c2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -210,6 +210,9 @@ struct lp_build_sample_context /** SIMD vector width */ unsigned vector_width; + /** number of lod values (valid are 1, length/4, length) */ + unsigned num_lods; + /** regular scalar float type */ struct lp_type float_type; struct lp_build_context float_bld; @@ -230,10 +233,18 @@ struct lp_build_sample_context struct lp_build_context int_coord_bld; /** Unsigned integer texture size */ + struct lp_type int_size_in_type; + struct lp_build_context int_size_in_bld; + + /** Float incoming texture size */ + struct lp_type float_size_in_type; + struct lp_build_context float_size_in_bld; + + /** Unsigned integer texture size (might be per quad) */ struct lp_type int_size_type; struct lp_build_context int_size_bld; - /** Unsigned integer texture size */ + /** Float texture size (might be per quad) */ struct lp_type float_size_type; struct lp_build_context float_size_bld; @@ -298,6 +309,7 @@ texture_dims(enum pipe_texture_target tex) { switch (tex) { case PIPE_TEXTURE_1D: + case PIPE_BUFFER: return 1; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -355,6 +367,11 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld, LLVMValueRef level); +LLVMValueRef +lp_build_get_mip_offsets(struct lp_build_sample_context *bld, + LLVMValueRef level); + + void lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, LLVMValueRef ilevel, @@ -365,7 +382,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, void lp_build_extract_image_sizes(struct lp_build_sample_context *bld, - struct lp_type size_type, + struct lp_build_context *size_bld, struct lp_type coord_type, LLVMValueRef size, LLVMValueRef *out_width, @@ -418,9 +435,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, struct lp_type fp_type, + boolean is_fetch, unsigned unit, - unsigned num_coords, const LLVMValueRef *coords, + const LLVMValueRef *offsets, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, LLVMValueRef explicit_lod, @@ -448,7 +466,6 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, void lp_build_sample_nop(struct gallivm_state *gallivm, struct lp_type type, - unsigned num_coords, const LLVMValueRef *coords, LLVMValueRef texel_out[4]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index d81033f83a0..236b68bb0ce 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -539,7 +539,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); lp_build_extract_image_sizes(bld, - bld->int_size_type, + &bld->int_size_bld, bld->int_coord_type, int_size, &width_vec, @@ -661,7 +661,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); lp_build_extract_image_sizes(bld, - bld->float_size_type, + &bld->float_size_bld, bld->coord_type, flt_size, &width_vec, @@ -994,7 +994,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); lp_build_extract_image_sizes(bld, - bld->int_size_type, + &bld->int_size_bld, bld->int_coord_type, int_size, &width_vec, @@ -1175,7 +1175,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); lp_build_extract_image_sizes(bld, - bld->float_size_type, + &bld->float_size_bld, bld->coord_type, flt_size, &width_vec, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 00a5b187bcb..daa49506ca5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -610,7 +610,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef x, y, z; lp_build_extract_image_sizes(bld, - bld->int_size_type, + &bld->int_size_bld, bld->int_coord_type, size, &width_vec, &height_vec, &depth_vec); @@ -618,7 +618,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, flt_size = lp_build_int_to_float(&bld->float_size_bld, size); lp_build_extract_image_sizes(bld, - bld->float_size_type, + &bld->float_size_bld, bld->coord_type, flt_size, &flt_width_vec, &flt_height_vec, &flt_depth_vec); @@ -695,7 +695,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, int chan; lp_build_extract_image_sizes(bld, - bld->int_size_type, + &bld->int_size_bld, bld->int_coord_type, size, &width_vec, &height_vec, &depth_vec); @@ -703,7 +703,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, flt_size = lp_build_int_to_float(&bld->float_size_bld, size); lp_build_extract_image_sizes(bld, - bld->float_size_type, + &bld->float_size_bld, bld->coord_type, flt_size, &flt_width_vec, &flt_height_vec, &flt_depth_vec); @@ -1157,6 +1157,120 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } +/** + * Texel fetch function. + * In contrast to general sampling there is no filtering, no coord minification, + * lod (if any) is always explicit uint, coords are uints (in terms of texel units) + * directly to be applied to the selected mip level (after adding texel offsets). + * This function handles texel fetch for all targets where texel fetch is supported + * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too). + */ +static void +lp_build_fetch_texel(struct lp_build_sample_context *bld, + unsigned unit, + const LLVMValueRef *coords, + LLVMValueRef explicit_lod, + const LLVMValueRef *offsets, + LLVMValueRef *colors_out) +{ + struct lp_build_context *perquadi_bld = &bld->perquadi_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + unsigned dims = bld->dims, chan; + LLVMValueRef size, ilevel; + LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; + LLVMValueRef x = coords[0], y = coords[1], z = coords[2]; + LLVMValueRef width, height, depth, i, j; + LLVMValueRef offset, out_of_bounds, out1; + + /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */ + if (explicit_lod && bld->static_state->target != PIPE_BUFFER) { + /* could also avoid this if there are no mipmaps */ + /* XXX temporary hack until ordinary sampling handles per-quad lod the same */ + bld->num_lods = bld->coord_type.length / 4; + bld->float_size_type = bld->float_size_in_type; + bld->float_size_type.length = bld->num_lods > 1 ? bld->coord_type.length : + bld->float_size_in_type.length; + bld->int_size_type = lp_int_type(bld->float_size_type); + lp_build_context_init(&bld->int_size_bld, bld->gallivm, bld->int_size_type); + lp_build_context_init(&bld->float_size_bld, bld->gallivm, bld->float_size_type); + + ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, + perquadi_bld->type, explicit_lod, 0); + lp_build_nearest_mip_level(bld, unit, ilevel, &ilevel); + } + else { + bld->num_lods = 1; + ilevel = lp_build_const_int32(bld->gallivm, 0); + } + lp_build_mipmap_level_sizes(bld, ilevel, + &size, + &row_stride_vec, &img_stride_vec); + lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type, + size, &width, &height, &depth); + + /* This is a lot like border sampling */ + if (offsets[0]) { + /* XXX coords are really unsigned, offsets are signed */ + x = lp_build_add(int_coord_bld, x, offsets[0]); + } + out_of_bounds = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); + out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); + out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); + + if (dims >= 2) { + if (offsets[1]) { + y = lp_build_add(int_coord_bld, y, offsets[1]); + } + out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); + out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); + out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); + out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); + + if (dims >= 3) { + if (offsets[2]) { + z = lp_build_add(int_coord_bld, z, offsets[2]); + } + out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); + out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); + out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); + } + } + + lp_build_sample_offset(int_coord_bld, + bld->format_desc, + x, y, z, row_stride_vec, img_stride_vec, + &offset, &i, &j); + + if (bld->static_state->target != PIPE_BUFFER) { + offset = lp_build_add(int_coord_bld, offset, + lp_build_get_mip_offsets(bld, ilevel)); + } + + offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds); + + lp_build_fetch_rgba_soa(bld->gallivm, + bld->format_desc, + bld->texel_type, + bld->base_ptr, offset, + i, j, + colors_out); + + if (0) { + /* + * Not needed except for ARB_robust_buffer_access_behavior. + * Could use min/max above instead of out-of-bounds comparisons + * (in fact cast to unsigned and min only is sufficient) + * if we don't care about the result returned for out-of-bounds. + */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, + bld->texel_bld.zero, colors_out[chan]); + } + } +} + + /** * Do shadow test/comparison. * \param p the texcoord Z (aka R, aka P) component @@ -1209,7 +1323,6 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, void lp_build_sample_nop(struct gallivm_state *gallivm, struct lp_type type, - unsigned num_coords, const LLVMValueRef *coords, LLVMValueRef texel_out[4]) { @@ -1227,6 +1340,7 @@ lp_build_sample_nop(struct gallivm_state *gallivm, * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. * \param type vector float type to use for coords, etc. + * \param is_fetch if this is a texel fetch instruction. * \param derivs partial derivatives of (s,t,r,q) with respect to x and y */ void @@ -1234,9 +1348,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm, const struct lp_sampler_static_state *static_state, struct lp_sampler_dynamic_state *dynamic_state, struct lp_type type, + boolean is_fetch, unsigned unit, - unsigned num_coords, const LLVMValueRef *coords, + const LLVMValueRef *offsets, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ @@ -1272,20 +1387,28 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld.int_type = lp_type_int(32); bld.coord_type = type; bld.int_coord_type = lp_int_type(type); - bld.float_size_type = lp_type_float(32); - bld.float_size_type.length = dims > 1 ? 4 : 1; - bld.int_size_type = lp_int_type(bld.float_size_type); + bld.float_size_in_type = lp_type_float(32); + bld.float_size_in_type.length = dims > 1 ? 4 : 1; + bld.int_size_in_type = lp_int_type(bld.float_size_in_type); bld.texel_type = type; bld.perquadf_type = type; /* we want native vector size to be able to use our intrinsics */ bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; bld.perquadi_type = lp_int_type(bld.perquadf_type); + bld.num_lods = 1; + bld.float_size_type = bld.float_size_in_type; + bld.float_size_type.length = bld.num_lods > 1 ? type.length : + bld.float_size_in_type.length; + bld.int_size_type = lp_int_type(bld.float_size_type); + lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); lp_build_context_init(&bld.float_vec_bld, gallivm, type); lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); + lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type); + lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type); lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); @@ -1311,7 +1434,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld.int_size = tex_width; } else { - bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef, + bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, tex_width, LLVMConstInt(i32t, 0, 0), ""); if (dims >= 2) { bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, @@ -1327,7 +1450,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm, /* For debug: no-op texture sampling */ lp_build_sample_nop(gallivm, bld.texel_type, - num_coords, coords, texel_out); } @@ -1352,6 +1474,18 @@ lp_build_sample_soa(struct gallivm_state *gallivm, static_state->wrap_t); } + if (is_fetch) { + lp_build_fetch_texel(&bld, unit, coords, + explicit_lod, offsets, + texel_out); + + if (static_state->target != PIPE_BUFFER) { + apply_sampler_swizzle(&bld, texel_out); + } + + return; + } + lp_build_sample_common(&bld, unit, &s, &t, &r, derivs, lod_bias, explicit_lod, @@ -1450,20 +1584,25 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld4.int_type = lp_type_int(32); bld4.coord_type = type4; bld4.int_coord_type = lp_int_type(type4); - bld4.float_size_type = lp_type_float(32); - bld4.float_size_type.length = dims > 1 ? 4 : 1; - bld4.int_size_type = lp_int_type(bld4.float_size_type); + bld4.float_size_in_type = lp_type_float(32); + bld4.float_size_in_type.length = dims > 1 ? 4 : 1; + bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); + bld4.float_size_type = bld4.float_size_in_type; + bld4.int_size_type = bld4.int_size_in_type; bld4.texel_type = type4; bld4.perquadf_type = type4; /* we want native vector size to be able to use our intrinsics */ bld4.perquadf_type.length = 1; bld4.perquadi_type = lp_int_type(bld4.perquadf_type); + bld4.num_lods = 1; lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); + lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type); + lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type); lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 201a3487588..3d70252e75a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -554,15 +554,16 @@ lp_build_transpose_aos(struct gallivm_state *gallivm, /** - * Pack first element of aos values, + * Pack n-th element of aos values, * pad out to destination size. - * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _ + * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ */ LLVMValueRef lp_build_pack_aos_scalars(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, - const LLVMValueRef src) + const LLVMValueRef src, + unsigned channel) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef undef = LLVMGetUndef(i32t); @@ -574,7 +575,7 @@ lp_build_pack_aos_scalars(struct gallivm_state *gallivm, assert(num_src <= num_dst); for (i = 0; i < num_src; i++) { - shuffles[i] = LLVMConstInt(i32t, i * 4, 0); + shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0); } for (i = num_src; i < num_dst; i++) { shuffles[i] = undef; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 0bf4ce988a2..c49d9167231 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -117,7 +117,8 @@ LLVMValueRef lp_build_pack_aos_scalars(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, - const LLVMValueRef src); + const LLVMValueRef src, + unsigned channel); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 680c85f843c..a4fea7d2961 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -334,6 +334,66 @@ lp_build_emit_fetch( } + +LLVMValueRef +lp_build_emit_fetch_texoffset( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + unsigned tex_off_op, + const unsigned chan_index) +{ + const struct tgsi_texture_offset *off = &inst->TexOffsets[tex_off_op]; + struct tgsi_full_src_register reg; + unsigned swizzle; + LLVMValueRef res; + enum tgsi_opcode_type stype = TGSI_TYPE_SIGNED; + + /* convert offset "register" to ordinary register so can use normal emit funcs */ + memset(®, 0, sizeof(reg)); + reg.Register.File = off->File; + reg.Register.Index = off->Index; + reg.Register.SwizzleX = off->SwizzleX; + reg.Register.SwizzleY = off->SwizzleY; + reg.Register.SwizzleZ = off->SwizzleZ; + + if (chan_index == LP_CHAN_ALL) { + swizzle = ~0; + } else { + swizzle = tgsi_util_get_src_register_swizzle(®.Register, chan_index); + if (swizzle > 2) { + assert(0 && "invalid swizzle in emit_fetch_texoffset()"); + return bld_base->base.undef; + } + } + + assert(off->Index <= bld_base->info->file_max[off->File]); + + if (bld_base->emit_fetch_funcs[off->File]) { + res = bld_base->emit_fetch_funcs[off->File](bld_base, ®, stype, + swizzle); + } else { + assert(0 && "invalid src register in emit_fetch_texoffset()"); + return bld_base->base.undef; + } + + /* + * Swizzle the argument + */ + + if (swizzle == ~0) { + res = bld_base->emit_swizzle(bld_base, res, + off->SwizzleX, + off->SwizzleY, + off->SwizzleZ, + /* there's no 4th channel */ + off->SwizzleX); + } + + return res; + +} + + boolean lp_build_tgsi_llvm( struct lp_build_tgsi_context * bld_base, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index e292420a61a..16d2ed9e6f7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -172,9 +172,10 @@ struct lp_build_sampler_soa (*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler, struct gallivm_state *gallivm, struct lp_type type, + boolean is_fetch, unsigned unit, - unsigned num_coords, const LLVMValueRef *coords, + const LLVMValueRef *offsets, const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ @@ -555,6 +556,14 @@ lp_build_emit_fetch( unsigned src_op, const unsigned chan_index); + +LLVMValueRef +lp_build_emit_fetch_texoffset( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + unsigned tex_off_op, + const unsigned chan_index); + boolean lp_build_tgsi_llvm( struct lp_build_tgsi_context * bld_base, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 85a4401b534..2afdd3027e0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1146,7 +1146,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, unsigned unit; LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; - LLVMValueRef coords[3]; + LLVMValueRef coords[4]; + LLVMValueRef offsets[3] = { NULL }; struct lp_derivatives derivs; unsigned num_coords; unsigned dims; @@ -1225,7 +1226,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); } - for (i = num_coords; i < 3; i++) { + for (i = num_coords; i < 4; i++) { coords[i] = bld->bld_base.base.undef; } @@ -1285,15 +1286,111 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, unit = inst->Src[1].Register.Index; } + /* some advanced gather instructions (txgo) would require 4 offsets */ + if (inst->Texture.NumOffsets == 1) { + unsigned dim; + for (dim = 0; dim < dims; dim++) { + offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim ); + } + } + bld->sampler->emit_fetch_texel(bld->sampler, bld->bld_base.base.gallivm, bld->bld_base.base.type, - unit, num_coords, coords, + FALSE, + unit, coords, + offsets, &derivs, lod_bias, explicit_lod, texel); } +static void +emit_txf( struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + LLVMValueRef *texel) +{ + unsigned unit; + LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); + LLVMValueRef explicit_lod = NULL; + LLVMValueRef coords[3]; + LLVMValueRef offsets[3] = { NULL }; + struct lp_derivatives derivs; + unsigned num_coords; + unsigned dims; + unsigned i; + + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + for (i = 0; i < 4; i++) { + texel[i] = coord_undef; + } + return; + } + + derivs.ddx_ddy[0] = coord_undef; + derivs.ddx_ddy[1] = coord_undef; + + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_BUFFER: + num_coords = 1; + dims = 1; + break; + case TGSI_TEXTURE_1D_ARRAY: + num_coords = 2; + dims = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + num_coords = 2; + dims = 2; + break; + case TGSI_TEXTURE_2D_ARRAY: + num_coords = 3; + dims = 2; + break; + case TGSI_TEXTURE_3D: + num_coords = 3; + dims = 3; + break; + default: + assert(0); + return; + } + + /* always have lod except for buffers ? */ + if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) { + explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); + } + + for (i = 0; i < num_coords; i++) { + coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i ); + } + for (i = num_coords; i < 3; i++) { + coords[i] = coord_undef; + } + + unit = inst->Src[1].Register.Index; + + if (inst->Texture.NumOffsets == 1) { + unsigned dim; + for (dim = 0; dim < dims; dim++) { + offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim ); + } + } + + bld->sampler->emit_fetch_texel(bld->sampler, + bld->bld_base.base.gallivm, + bld->bld_base.base.type, + TRUE, + unit, coords, + offsets, + &derivs, + NULL, explicit_lod, + texel); +} + static void emit_txq( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, @@ -1755,6 +1852,17 @@ txq_emit( emit_txq(bld, emit_data->inst, emit_data->output); } +static void +txf_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + emit_txf(bld, emit_data->inst, emit_data->output); +} + static void cal_emit( const struct lp_build_tgsi_action * action, @@ -2126,6 +2234,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 51ca373b6ba..458bc69d169 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -293,6 +293,7 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_USHR: case TGSI_OPCODE_SHL: case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXF: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_MOD: case TGSI_OPCODE_I2F: -- cgit v1.2.3