diff options
author | Keith Whitwell <[email protected]> | 2010-10-17 19:03:42 -0700 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2010-10-17 19:09:42 -0700 |
commit | 0072acd447dc6be652e63752e50215c3105322c8 (patch) | |
tree | 847d1763b54772d336a04e606f8248291c3092b7 /src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | |
parent | 543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 (diff) | |
parent | ca2b2ac131933b4171b519813df1aaa3a81621cd (diff) |
Merge remote branch 'origin/master' into lp-setup-llvm
Conflicts:
src/gallium/drivers/llvmpipe/lp_setup_coef.c
src/gallium/drivers/llvmpipe/lp_setup_coef.h
src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_setup_tri.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.h
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 621 |
1 files changed, 289 insertions, 332 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 7e064900e77..d3e3b242af4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,6 +45,7 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" @@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_i) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + else { + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord = LLVMBuildAdd(bld->builder, coord, bias, ""); coord = LLVMBuildURem(bld->builder, coord, length, ""); + } break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: @@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, assert(0); } - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); } @@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i0, LLVMValueRef *i1) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * multiplication. */ - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; + *i0 = int_coord_bld->zero; + *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); @@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); } else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord0 = LLVMBuildAdd(bld->builder, coord0, bias, ""); coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); } mask = lp_build_compare(bld->builder, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), + lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; @@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = lp_build_add(int_coord_bld, *offset0, LLVMBuildAnd(bld->builder, stride, mask, "")); break; @@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; + *offset0 = int_coord_bld->zero; + *offset1 = int_coord_bld->zero; break; } } @@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, t_ipart, r_ipart; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; @@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset, &y_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); if (dims >= 3) { LLVMValueRef z_offset; lp_build_sample_wrap_nearest_int(bld, @@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_r, &z_offset, &z_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; /* The r coord is the cube face in [0,5] */ - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } } @@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); + const unsigned dims = bld->dims; LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; @@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); + if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* subtract 0.5 (add -128) */ i32_c128 = lp_build_const_int_vec(i32.type, -128); - if (!bld->static_state->force_nearest_s) { - s = LLVMBuildAdd(builder, s, i32_c128, ""); - } - if (dims >= 2 && !bld->static_state->force_nearest_t) { + s = LLVMBuildAdd(builder, s, i32_c128, ""); + if (dims >= 2) { t = LLVMBuildAdd(builder, t, i32_c128, ""); } if (dims >= 3) { @@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } @@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } @@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* * Linear interpolation with 8.8 fixed point. */ - if (bld->static_state->force_nearest_s) { - /* special case 1-D lerp */ - packed_lo = lp_build_lerp(&h16, - t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - t_fpart_hi, - neighbors_hi[0][1][0], - neighbors_hi[0][1][0]); - } - else if (bld->static_state->force_nearest_t) { - /* special case 1-D lerp */ + if (dims == 1) { + /* 1-D lerp */ packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); + s_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1]); packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); + s_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1]); } else { - /* general 1/2/3-D lerping */ - if (dims == 1) { - packed_lo = lp_build_lerp(&h16, - s_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1]); - - packed_hi = lp_build_lerp(&h16, - s_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1]); - } - else { - /* 2-D lerp */ - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0][0], - neighbors_lo[0][0][1], - neighbors_lo[0][1][0], - neighbors_lo[0][1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0][0], - neighbors_hi[0][0][1], - neighbors_hi[0][1][0], - neighbors_hi[0][1][1]); - - if (dims >= 3) { - LLVMValueRef packed_lo2, packed_hi2; - - /* lerp in the second z slice */ - packed_lo2 = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[1][0][0], - neighbors_lo[1][0][1], - neighbors_lo[1][1][0], - neighbors_lo[1][1][1]); - - packed_hi2 = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[1][0][0], - neighbors_hi[1][0][1], - neighbors_hi[1][1][0], - neighbors_hi[1][1][1]); - /* interp between two z slices */ - packed_lo = lp_build_lerp(&h16, r_fpart_lo, - packed_lo, packed_lo2); - packed_hi = lp_build_lerp(&h16, r_fpart_hi, - packed_hi, packed_hi2); - } + /* 2-D lerp */ + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1], + neighbors_lo[0][1][0], + neighbors_lo[0][1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0][0], + neighbors_hi[0][0][1], + neighbors_hi[0][1][0], + neighbors_hi[0][1][1]); + + if (dims >= 3) { + LLVMValueRef packed_lo2, packed_hi2; + + /* lerp in the second z slice */ + packed_lo2 = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[1][0][0], + neighbors_lo[1][0][1], + neighbors_lo[1][1][0], + neighbors_lo[1][1][1]); + + packed_hi2 = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[1][0][0], + neighbors_hi[1][0][1], + neighbors_hi[1][1][0], + neighbors_hi[1][1][1]); + /* interp between two z slices */ + packed_lo = lp_build_lerp(&h16, r_fpart_lo, + packed_lo, packed_lo2); + packed_hi = lp_build_lerp(&h16, r_fpart_hi, + packed_hi, packed_hi2); } } @@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef colors_lo_var, + LLVMValueRef colors_hi_var) { + LLVMBuilderRef builder = bld->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; + + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } + /* Store the first level's colors in the output variables */ + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - struct lp_build_context h16; - lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); - - *colors_lo = lp_build_lerp(&h16, lod_fpart, - colors0_lo, colors1_lo); - *colors_hi = lp_build_lerp(&h16, lod_fpart, - colors0_hi, colors1_hi); - } - else { - /* use first/only level's colors */ - *colors_lo = colors0_lo; - *colors_hi = colors0_hi; + LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0); + LLVMTypeRef i32_type = LLVMIntType(32); + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); + lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, + lod_fpart, LLVMConstNull(i32_type), + "need_lerp"); + + lp_build_if(&if_ctx, builder, need_lerp); + { + struct lp_build_context h16_bld; + + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); + + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + else { + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); + lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); + +#if HAVE_LLVM == 0x208 + /* This is a work-around for a bug in LLVM 2.8. + * Evidently, something goes wrong in the construction of the + * lod_fpart short[8] vector. Adding this no-effect shuffle seems + * to force the vector to be properly constructed. + * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). + */ + { + LLVMValueRef shuffles[8], shuffle; + int i; + assert(h16_bld.type.length <= Elements(shuffles)); + for (i = 0; i < h16_bld.type.length; i++) + shuffles[i] = lp_build_const_int32(2 * (i & 1)); + shuffle = LLVMConstVector(shuffles, h16_bld.type.length); + lod_fpart = LLVMBuildShuffleVector(builder, + lod_fpart, lod_fpart, + shuffle, ""); + } +#endif + + colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, + colors0_lo, colors1_lo); + colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, + colors0_hi, colors1_hi); + + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + } + lp_build_endif(&if_ctx); } } @@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]) { - struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + const unsigned dims = bld->dims; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; - struct lp_build_context h16; - LLVMTypeRef h16_vec_type; + struct lp_build_context h16_bld; + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); @@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - h16_vec_type = lp_build_vec_type(h16.type); - + lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { @@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } - /* * Compute the level of detail (float). */ @@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 - * If mipfilter=linear, also compute the weight between the two - * mipmap levels: lod_fpart */ switch (mip_filter) { default: @@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + ilevel0 = i32t_zero; } break; case PIPE_TEX_MIPFILTER_NEAREST: - assert(lod); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: - { - LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMValueRef i255 = lp_build_const_int32(255); - LLVMTypeRef i16_type = LLVMIntType(16); - - assert(lod); - - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); - lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); - lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); - lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); - lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); - - /* the lod_fpart values will be fixed pt values in [0,1) */ - } + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); - } + packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo"); + packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi"); - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow_ctx); - - packed_lo = LLVMGetUndef(h16_vec_type); - packed_hi = LLVMGetUndef(h16_vec_type); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - lp_build_flow_scope_declare(flow_ctx, &packed_lo); - lp_build_flow_scope_declare(flow_ctx, &packed_hi); - - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, builder, minify); { /* Use the minification filter */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ - lp_build_sample_mipmap(bld, mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + i32t_zero, NULL, NULL, + packed_lo, packed_hi); } lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); } - /* combine 'packed_lo', 'packed_hi' into 'packed' */ - { - struct lp_build_context h16, u8n; - - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - packed = lp_build_pack2(builder, h16.type, u8n.type, - packed_lo, packed_hi); - } + /* + * combine the values stored in 'packed_lo' and 'packed_hi' variables + * into 'packed' + */ + packed = lp_build_pack2(builder, + h16_bld.type, lp_type_unorm(8), + LLVMBuildLoad(builder, packed_lo, ""), + LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. |