diff options
author | Marek Olšák <[email protected]> | 2018-08-29 01:34:46 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-09-10 15:19:56 -0400 |
commit | be0bd95abf69920fc11fb50384ffc2f886a5f9e8 (patch) | |
tree | 408da1bf4c15600ea54eeea11dd15febc98ccff4 /src/amd | |
parent | fa595e3d0c47be1e91d8d24cce90c756fed23044 (diff) |
radeonsi: fix GPU hangs with bindless textures and LLVM 7.0
Tested-by: Dieter Nützel <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 52 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 4 |
2 files changed, 51 insertions, 5 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1c8d944db74..1f5112e9929 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -835,6 +835,14 @@ ac_build_gep0(struct ac_llvm_context *ctx, return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); } +LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, + LLVMValueRef index) +{ + return LLVMBuildPointerCast(ctx->builder, + ac_build_gep0(ctx, ptr, index), + LLVMTypeOf(ptr), ""); +} + void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, @@ -853,14 +861,39 @@ ac_build_indexed_store(struct ac_llvm_context *ctx, * \param uniform Whether the base_ptr and index can be assumed to be * dynamically uniform (i.e. load to an SGPR) * \param invariant Whether the load is invariant (no other opcodes affect it) + * \param no_unsigned_wraparound + * For all possible re-associations and re-distributions of an expression + * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs + * without inbounds in base_ptr), this parameter is true if "addr + offset" + * does not result in an unsigned integer wraparound. This is used for + * optimal code generation of 32-bit pointer arithmetic. + * + * For example, a 32-bit immediate offset that causes a 32-bit unsigned + * integer wraparound can't be an imm offset in s_load_dword, because + * the instruction performs "addr + offset" in 64 bits. + * + * Expected usage for bindless textures by chaining GEPs: + * // possible unsigned wraparound, don't use InBounds: + * ptr1 = LLVMBuildGEP(base_ptr, index); + * image = load(ptr1); // becomes "s_load ptr1, 0" + * + * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize); + * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds */ static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, - LLVMValueRef index, bool uniform, bool invariant) + LLVMValueRef index, bool uniform, bool invariant, + bool no_unsigned_wraparound) { LLVMValueRef pointer, result; + LLVMValueRef indices[2] = {ctx->i32_0, index}; + + if (no_unsigned_wraparound && + LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_CONST_32BIT_ADDR_SPACE) + pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, ""); + else + pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, ""); - pointer = ac_build_gep0(ctx, base_ptr, index); if (uniform) LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); result = LLVMBuildLoad(ctx->builder, pointer, ""); @@ -872,19 +905,28 @@ ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, false, false); + return ac_build_load_custom(ctx, base_ptr, index, false, false, false); } LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, false, true); + return ac_build_load_custom(ctx, base_ptr, index, false, true, false); } +/* This assumes that there is no unsigned integer wraparound during the address + * computation, excluding all GEPs within base_ptr. */ LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { - return ac_build_load_custom(ctx, base_ptr, index, true, true); + return ac_build_load_custom(ctx, base_ptr, index, true, true, true); +} + +/* See ac_build_load_custom() documentation. */ +LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index) +{ + return ac_build_load_custom(ctx, base_ptr, index, true, true, false); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index b080cca4cb7..0d261bae097 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -203,6 +203,8 @@ LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, + LLVMValueRef index); void ac_build_indexed_store(struct ac_llvm_context *ctx, @@ -215,6 +217,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index); void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, |