radv: do not expose GTT as device local memory mostly for APUs

On APUs, the memory is unified (all heaps are equally fast) and apps should count all memory heaps together. But some games like Id Tech games (Youngblood and such) don't manage memory correctly on APUs and they spill everything when one VRAM heap is full. Instead of spilling buffers, they should just allocate new buffers in the second heap but it seems like these games are confused if two memory heaps have the DEVICE_LOCAL_BIT set. This is probably a first step towards better memory management on APUs but there is still some work to do if we want to run most apps with a small dedicated VRAM (256MB or so). This gives a huge boost for Id Tech games on APUs, and doesn't seem to reduce Feral games performance. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4771>
author: Samuel Pitoiset <[email protected]> 2020-04-22 16:54:28 +0200
committer: Marge Bot <[email protected]> 2020-04-27 22:41:41 +0000
commit: 7a0a6a718035e1a754972fbbad8b91d19f39fa42 (patch)
tree: 82be50020d80bbdac58072ec771ced78337bb2b5 /src/amd
parent: 4a523baa00fcf12dabd2e7b054ce73ac238c11a7 (diff)
1 files changed, 30 insertions, 29 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c34674d0904..b590a92d4d5 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -180,6 +180,15 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 		};
 	}
+
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
 	if (visible_vram_size) {
 		visible_vram_index = device->memory_properties.memoryHeapCount++;
 		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
@@ -187,24 +196,29 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 		};
 	}
-	if (device->rad_info.gart_size > 0) {
-		gart_index = device->memory_properties.memoryHeapCount++;
-		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
-			.size = device->rad_info.gart_size,
-			.flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-		};
-	}
 
 	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
 	unsigned type_count = 0;
-	if (vram_index >= 0) {
-		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-			.heapIndex = vram_index,
-		};
+
+	if (device->rad_info.has_dedicated_vram) {
+		if (vram_index >= 0) {
+			device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+			device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+				.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+				.heapIndex = vram_index,
+			};
+		}
+	} else {
+		if (visible_vram_index >= 0) {
+			device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+			device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+				.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+				.heapIndex = visible_vram_index,
+			};
+		}
 	}
-	if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
+
+	if (gart_index >= 0) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
@@ -221,26 +235,13 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
 			.heapIndex = visible_vram_index,
 		};
 	}
-	if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
-		/* Put GTT after visible VRAM for GPUs without dedicated VRAM
-		 * as they have identical property flags, and according to the
-		 * spec, for types with identical flags, the one with greater
-		 * performance must be given a lower index. */
-		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
-		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-			.heapIndex = gart_index,
-		};
-	}
+
 	if (gart_index >= 0) {
 		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-			VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
-			(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
 			.heapIndex = gart_index,
 		};
 	}
author	Samuel Pitoiset <[email protected]>	2020-04-22 16:54:28 +0200
committer	Marge Bot <[email protected]>	2020-04-27 22:41:41 +0000
commit	7a0a6a718035e1a754972fbbad8b91d19f39fa42 (patch)
tree	82be50020d80bbdac58072ec771ced78337bb2b5 /src/amd
parent	4a523baa00fcf12dabd2e7b054ce73ac238c11a7 (diff)