aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd/llvm
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-03-26 11:40:35 +0100
committerSamuel Pitoiset <[email protected]>2020-03-27 08:05:43 +0100
commitba2ec1f369d2c97fc7c54ecd52b0addcfd349a31 (patch)
tree64f5769e59c1c45e62e724391ff65d56024d7847 /src/amd/llvm
parentd548384fc686f4e9cc9e6551f9a582cc740f3233 (diff)
ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()
Instead of emitting 1.0 / x which includes a slow division that LLVM doesn't always optimize even if the metadata is correctly set. No pipeline-db changes with VEGA10/LLVM 9. pipeline-db (VEGA10/LLVM 10): Totals from affected shaders: SGPRS: 6672 -> 6672 (0.00 %) VGPRS: 6652 -> 6652 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 561780 -> 561692 (-0.02 %) bytes Max Waves: 1043 -> 1043 (0.00 %) pipeline-db (VEGA10/LLVM 11 - 92744f62478): Totals from affected shaders: SGPRS: 84608 -> 83768 (-0.99 %) VGPRS: 106768 -> 106636 (-0.12 %) Spilled SGPRs: 1625 -> 1713 (5.42 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 10850936 -> 10726712 (-1.14 %) bytes Max Waves: 3152 -> 3180 (0.89 %) LLVM 11 (master) is more affected than previous versions, but based on the small impact with LLVM 9/10, I decided to emit it unconditionally. Cc: 20.0 <[email protected]> Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326>
Diffstat (limited to 'src/amd/llvm')
-rw-r--r--src/amd/llvm/ac_llvm_build.c30
-rw-r--r--src/amd/llvm/ac_llvm_build.h2
2 files changed, 12 insertions, 20 deletions
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index f13baf792fc..c2b5667f996 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -65,8 +65,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
enum ac_float_mode float_mode, unsigned wave_size,
unsigned ballot_mask_bits)
{
- LLVMValueRef args[1];
-
ctx->context = LLVMContextCreate();
ctx->chip_class = chip_class;
@@ -127,11 +125,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"invariant.load", 14);
- ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
-
- args[0] = LLVMConstReal(ctx->f32, 2.5);
- ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"amdgpu.uniform", 14);
@@ -707,17 +700,18 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
LLVMValueRef num,
LLVMValueRef den)
{
- /* If we do (num / den), LLVM >= 7.0 does:
- * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
- *
- * If we do (num * (1 / den)), LLVM does:
- * return num * v_rcp_f32(den);
- */
- LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
- LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
- /* Use v_rcp_f32 instead of precise division. */
- if (!LLVMIsConstant(rcp))
- LLVMSetMetadata(rcp, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+ unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
+ const char *name;
+
+ if (type_size == 2)
+ name = "llvm.amdgcn.rcp.f16";
+ else if (type_size == 4)
+ name = "llvm.amdgcn.rcp.f32";
+ else
+ name = "llvm.amdgcn.rcp.f64";
+
+ LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den),
+ &den, 1, AC_FUNC_ATTR_READNONE);
return LLVMBuildFMul(ctx->builder, num, rcp, "");
}
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 2e60cf0ada1..ff90092754e 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -123,8 +123,6 @@ struct ac_llvm_context {
unsigned range_md_kind;
unsigned invariant_load_md_kind;
unsigned uniform_md_kind;
- unsigned fpmath_md_kind;
- LLVMValueRef fpmath_md_2p5_ulp;
LLVMValueRef empty_md;
enum chip_class chip_class;