diff options
author | Marek Olšák <[email protected]> | 2018-09-22 21:17:52 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-10-16 17:23:25 -0400 |
commit | bfc795670ec82af5767cf360806e32322664604e (patch) | |
tree | b6d2672e2e4cf42bfa1d7e10ca05959a68f92d30 | |
parent | ea039f789d9b54e1bd1d644b6a29863ca3500314 (diff) |
ac: add helpers for fast integer division by a constant
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 61 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 17 |
2 files changed, 78 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index d0bcfe2dfb3..2d78ca1b52a 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -594,6 +594,67 @@ ac_build_fdiv(struct ac_llvm_context *ctx, return ret; } +/* See fast_idiv_by_const.h. */ +/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */ +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildMul(builder, + LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildAdd(builder, num, + LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); +} + +/* See fast_idiv_by_const.h. */ +/* If num != UINT_MAX, this more efficient version can be used. */ +/* Set: increment = util_fast_udiv_info::increment; */ +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildNUWAdd(builder, num, increment, ""); + num = LLVMBuildMul(builder, + LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); +} + +/* See fast_idiv_by_const.h. */ +/* Both operands must fit in 31 bits and the divisor must not be 1. */ +LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef post_shift) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildMul(builder, + LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); +} + /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is * already multiplied by two. id is the cube face number. diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 5c3e221b2cc..f68efbc49ff 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -182,6 +182,23 @@ ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den); +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment); +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment); +LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef post_shift); + void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, |