diff options
author | Marek Olšák <[email protected]> | 2019-05-24 18:48:39 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-07-03 15:51:13 -0400 |
commit | 4bdf44724fa8bd88d8f5ed2d8627a4b5ba02cbc0 (patch) | |
tree | 068200aedd2e899cfc52485e91deabc760885fe4 /src/amd | |
parent | f81aa6b0c8117351aaf67dd9dfe5a68fe48ded0a (diff) |
radeonsi/gfx10: set DLC for loads when GLC is set
This fixes L1 shader array cache coherency.
Acked-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 19 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 7 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 12 |
3 files changed, 26 insertions, 12 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index ecb72395867..5089463e2db 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1107,6 +1107,15 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, return ac_build_load_custom(ctx, base_ptr, index, true, true, false); } +static LLVMValueRef get_cache_policy(struct ac_llvm_context *ctx, + bool load, bool glc, bool slc) +{ + return LLVMConstInt(ctx->i32, + (glc ? ac_glc : 0) + + (slc ? ac_slc : 0) + + (ctx->chip_class >= GFX10 && glc && load ? ac_dlc : 0), 0); +} + static void ac_build_llvm7_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -1165,7 +1174,7 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, args[idx++] = vindex ? vindex : ctx->i32_0; args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + args[idx++] = get_cache_policy(ctx, false, glc, slc); unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1350,7 +1359,7 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, args[idx++] = vindex ? vindex : ctx->i32_0; args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; - args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + args[idx++] = get_cache_policy(ctx, true, glc, slc); unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1404,6 +1413,8 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32" : "llvm.SI.load.const.v4i32"; unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2; + /* TODO: set glc+dlc on GFX10 (LLVM support is missing) */ + assert(!glc || ctx->chip_class < GFX10); LLVMValueRef args[3] = { rsrc, offset, @@ -1551,7 +1562,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx, dfmt, nfmt), 0); - args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + args[idx++] = get_cache_policy(ctx, true, glc, slc); unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -2049,7 +2060,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx, dfmt, nfmt), 0); - args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + args[idx++] = get_cache_policy(ctx, false, glc, slc); unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 17e701b21f8..4917315cc50 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -527,8 +527,9 @@ enum ac_image_dim { /* These cache policy bits match the definitions used by the LLVM intrinsics. */ enum ac_image_cache_policy { - ac_glc = 1 << 0, - ac_slc = 1 << 1, + ac_glc = 1 << 0, /* per-CU cache control */ + ac_slc = 1 << 1, /* global L2 cache control */ + ac_dlc = 1 << 2, /* per-shader-array cache control */ }; struct ac_image_args { @@ -536,7 +537,7 @@ struct ac_image_args { enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */ enum ac_image_dim dim : 3; unsigned dmask : 4; - unsigned cache_policy : 2; + unsigned cache_policy : 3; bool unorm : 1; bool level_zero : 1; unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */ diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 636fd4035c8..73941ba6f45 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1519,6 +1519,7 @@ static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueR static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access, + bool load, bool may_store_unaligned, bool writeonly_memory) { @@ -1535,7 +1536,8 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, */ writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) { - cache_policy |= ac_glc; + cache_policy |= ac_glc | + (ctx->ac.chip_class >= GFX10 && load ? ac_dlc : 0); } return cache_policy; @@ -1549,7 +1551,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, unsigned writemask = nir_intrinsic_write_mask(instr); enum gl_access_qualifier access = nir_intrinsic_access(instr); bool writeonly_memory = access & ACCESS_NON_READABLE; - unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory); + unsigned cache_policy = get_cache_policy(ctx, access, false, false, writeonly_memory); LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[1]), true); @@ -1713,7 +1715,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, int elem_size_bytes = instr->dest.ssa.bit_size / 8; int num_components = instr->num_components; enum gl_access_qualifier access = nir_intrinsic_access(instr); - unsigned cache_policy = get_cache_policy(ctx, access, false, false); + unsigned cache_policy = get_cache_policy(ctx, access, true, false, false); LLVMValueRef offset = get_src(ctx, instr->src[1]); LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, @@ -2452,7 +2454,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, struct ac_image_args args = {}; - args.cache_policy = get_cache_policy(ctx, access, false, false); + args.cache_policy = get_cache_policy(ctx, access, true, false, false); if (dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); @@ -2510,7 +2512,7 @@ static void visit_image_store(struct ac_nir_context *ctx, bool writeonly_memory = access & ACCESS_NON_READABLE; struct ac_image_args args = {}; - args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory); + args.cache_policy = get_cache_policy(ctx, access, false, true, writeonly_memory); if (dim == GLSL_SAMPLER_DIM_BUF) { LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false); |