summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-09-09 10:29:22 +0200
committerSamuel Pitoiset <[email protected]>2019-09-16 12:08:22 +0200
commit5ebc76471c48b1831114db2558bdc33a7dc0be05 (patch)
tree593c249f6ad82bd0542d69a648d9669fe060d6b6 /src/amd
parente1dc3ab753480db414a68ef7944f00cfc75d5882 (diff)
radv/gfx10: adjust the GS NGG scratch size for streamout
It needs more space for multiple streams. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c6
-rw-r--r--src/amd/vulkan/radv_shader.c16
2 files changed, 19 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index d9c91f0591b..5019fa301c1 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -4208,9 +4208,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
}
- /* TODO: streamout */
+ unsigned scratch_size = 8;
+ if (ctx.shader_info->so.num_outputs)
+ scratch_size = 44;
- LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8);
+ LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
ctx.gs_ngg_scratch =
LLVMAddGlobalInAddressSpace(ctx.ac.module,
ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c8dd54fae53..02a8712a972 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -900,9 +900,23 @@ radv_shader_variant_create(struct radv_device *device,
* size randomly hangs with CTS. Just use the maximum
* possible LDS size for now.
*/
+ unsigned ngg_scratch_size = 8 * 4;
+ if (binary->info.so.num_outputs) {
+ /* Memory layout of NGG streamout scratch:
+ * [0-3]: number of generated primitives
+ * [4-7]: number of emitted primitives
+ * [8-11]: streamout offsets
+ * [12:19]: primitive offsets for stream 0
+ * [20:27]: primitive offsets for stream 1
+ * [28:35]: primitive offsets for stream 2
+ * [36:43]: primitive offsets for stream 3
+ */
+ ngg_scratch_size = 44 * 4;
+ }
+
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
- sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - 32; /* 32 is NGG scratch */
+ sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - ngg_scratch_size;
sym->align = 64 * 1024;
}