From 9851c8285f7bf70a6cb4bede2ee94110c14acc19 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:37:59 -0800 Subject: Move the intel vulkan driver to src/intel/vulkan --- src/intel/vulkan/.gitignore | 9 + src/intel/vulkan/Makefile.am | 203 +++ src/intel/vulkan/anv_allocator.c | 862 +++++++++ src/intel/vulkan/anv_batch_chain.c | 1077 +++++++++++ src/intel/vulkan/anv_cmd_buffer.c | 1191 +++++++++++++ src/intel/vulkan/anv_descriptor_set.c | 532 ++++++ src/intel/vulkan/anv_device.c | 1789 +++++++++++++++++++ src/intel/vulkan/anv_dump.c | 209 +++ src/intel/vulkan/anv_entrypoints_gen.py | 324 ++++ src/intel/vulkan/anv_formats.c | 603 +++++++ src/intel/vulkan/anv_gem.c | 358 ++++ src/intel/vulkan/anv_gem_stubs.c | 159 ++ src/intel/vulkan/anv_gen_macros.h | 146 ++ src/intel/vulkan/anv_image.c | 911 ++++++++++ src/intel/vulkan/anv_intel.c | 100 ++ src/intel/vulkan/anv_meta.c | 169 ++ src/intel/vulkan/anv_meta.h | 75 + src/intel/vulkan/anv_meta_blit.c | 1442 +++++++++++++++ src/intel/vulkan/anv_meta_clear.c | 1098 ++++++++++++ src/intel/vulkan/anv_meta_resolve.c | 867 +++++++++ src/intel/vulkan/anv_nir.h | 44 + src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 171 ++ src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 394 ++++ src/intel/vulkan/anv_nir_lower_push_constants.c | 77 + src/intel/vulkan/anv_pass.c | 160 ++ src/intel/vulkan/anv_pipeline.c | 1278 +++++++++++++ src/intel/vulkan/anv_pipeline_cache.c | 405 +++++ src/intel/vulkan/anv_private.h | 1876 ++++++++++++++++++++ src/intel/vulkan/anv_query.c | 187 ++ src/intel/vulkan/anv_util.c | 195 ++ src/intel/vulkan/anv_wsi.c | 196 ++ src/intel/vulkan/anv_wsi.h | 74 + src/intel/vulkan/anv_wsi_wayland.c | 871 +++++++++ src/intel/vulkan/anv_wsi_x11.c | 758 ++++++++ src/intel/vulkan/dev_icd.json.in | 7 + src/intel/vulkan/gen7_cmd_buffer.c | 589 ++++++ src/intel/vulkan/gen7_pipeline.c | 410 +++++ src/intel/vulkan/gen7_state.c | 264 +++ src/intel/vulkan/gen8_cmd_buffer.c | 914 ++++++++++ src/intel/vulkan/gen8_pipeline.c | 573 ++++++ src/intel/vulkan/gen8_state.c | 493 +++++ src/intel/vulkan/genX_cmd_buffer.c | 717 ++++++++ src/intel/vulkan/genX_pipeline.c | 126 ++ src/intel/vulkan/genX_pipeline_util.h | 327 ++++ src/intel/vulkan/genX_state_util.h | 112 ++ src/intel/vulkan/intel_icd.json.in | 7 + src/intel/vulkan/tests/.gitignore | 5 + src/intel/vulkan/tests/Makefile.am | 46 + src/intel/vulkan/tests/block_pool_no_free.c | 144 ++ src/intel/vulkan/tests/state_pool.c | 57 + src/intel/vulkan/tests/state_pool_free_list_only.c | 66 + src/intel/vulkan/tests/state_pool_no_free.c | 117 ++ src/intel/vulkan/tests/state_pool_test_helper.h | 71 + 53 files changed, 23855 insertions(+) create mode 100644 src/intel/vulkan/.gitignore create mode 100644 src/intel/vulkan/Makefile.am create mode 100644 src/intel/vulkan/anv_allocator.c create mode 100644 src/intel/vulkan/anv_batch_chain.c create mode 100644 src/intel/vulkan/anv_cmd_buffer.c create mode 100644 src/intel/vulkan/anv_descriptor_set.c create mode 100644 src/intel/vulkan/anv_device.c create mode 100644 src/intel/vulkan/anv_dump.c create mode 100644 src/intel/vulkan/anv_entrypoints_gen.py create mode 100644 src/intel/vulkan/anv_formats.c create mode 100644 src/intel/vulkan/anv_gem.c create mode 100644 src/intel/vulkan/anv_gem_stubs.c create mode 100644 src/intel/vulkan/anv_gen_macros.h create mode 100644 src/intel/vulkan/anv_image.c create mode 100644 src/intel/vulkan/anv_intel.c create mode 100644 src/intel/vulkan/anv_meta.c create mode 100644 src/intel/vulkan/anv_meta.h create mode 100644 src/intel/vulkan/anv_meta_blit.c create mode 100644 src/intel/vulkan/anv_meta_clear.c create mode 100644 src/intel/vulkan/anv_meta_resolve.c create mode 100644 src/intel/vulkan/anv_nir.h create mode 100644 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c create mode 100644 src/intel/vulkan/anv_nir_apply_pipeline_layout.c create mode 100644 src/intel/vulkan/anv_nir_lower_push_constants.c create mode 100644 src/intel/vulkan/anv_pass.c create mode 100644 src/intel/vulkan/anv_pipeline.c create mode 100644 src/intel/vulkan/anv_pipeline_cache.c create mode 100644 src/intel/vulkan/anv_private.h create mode 100644 src/intel/vulkan/anv_query.c create mode 100644 src/intel/vulkan/anv_util.c create mode 100644 src/intel/vulkan/anv_wsi.c create mode 100644 src/intel/vulkan/anv_wsi.h create mode 100644 src/intel/vulkan/anv_wsi_wayland.c create mode 100644 src/intel/vulkan/anv_wsi_x11.c create mode 100644 src/intel/vulkan/dev_icd.json.in create mode 100644 src/intel/vulkan/gen7_cmd_buffer.c create mode 100644 src/intel/vulkan/gen7_pipeline.c create mode 100644 src/intel/vulkan/gen7_state.c create mode 100644 src/intel/vulkan/gen8_cmd_buffer.c create mode 100644 src/intel/vulkan/gen8_pipeline.c create mode 100644 src/intel/vulkan/gen8_state.c create mode 100644 src/intel/vulkan/genX_cmd_buffer.c create mode 100644 src/intel/vulkan/genX_pipeline.c create mode 100644 src/intel/vulkan/genX_pipeline_util.h create mode 100644 src/intel/vulkan/genX_state_util.h create mode 100644 src/intel/vulkan/intel_icd.json.in create mode 100644 src/intel/vulkan/tests/.gitignore create mode 100644 src/intel/vulkan/tests/Makefile.am create mode 100644 src/intel/vulkan/tests/block_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool.c create mode 100644 src/intel/vulkan/tests/state_pool_free_list_only.c create mode 100644 src/intel/vulkan/tests/state_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool_test_helper.h (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore new file mode 100644 index 00000000000..40afc2e3989 --- /dev/null +++ b/src/intel/vulkan/.gitignore @@ -0,0 +1,9 @@ +# Generated source files +/*_spirv_autogen.h +/anv_entrypoints.c +/anv_entrypoints.h +/wayland-drm-protocol.c +/wayland-drm-client-protocol.h +/dev_icd.json +/intel_icd.json +/gen*_pack.h \ No newline at end of file diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am new file mode 100644 index 00000000000..2144e5a691a --- /dev/null +++ b/src/intel/vulkan/Makefile.am @@ -0,0 +1,203 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + +lib_LTLIBRARIES = libvulkan_intel.la + +check_LTLIBRARIES = libvulkan-test.la + +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la \ + libanv-gen8.la \ + libanv-gen9.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/compiler \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/intel/ \ + -I$(top_builddir)/src \ + -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/vulkan + +libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_descriptor_set.c \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_meta_blit.c \ + anv_meta_clear.c \ + anv_meta_resolve.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ + anv_pass.c \ + anv_pipeline.c \ + anv_pipeline_cache.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 + +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 + +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_intel_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/intel/isl/libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + $(top_builddir)/src/mesa/libmesa.la \ + $(top_builddir)/src/mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) + +libvulkan_intel_la_LDFLAGS = \ + -module -avoid-version -shared -shrext .so + + +# Generate icd files. It would be nice to just be able to add these to +# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', +# which we can't put in the icd file. When running sed from the Makefile we +# can use ${libdir}, which expands completely and we avoid putting Makefile +# variables in the icd file. + +icdconfdir=$(sysconfdir)/vulkan/icd.d +icdconf_DATA = intel_icd.json +noinst_DATA = dev_icd.json + +%.json : %.json.in + $(AM_V_GEN) $(SED) \ + -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ + -e "s#@install_libdir@#${libdir}#" < $< > $@ + + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c new file mode 100644 index 00000000000..a7ae975656b --- /dev/null +++ b/src/intel/vulkan/anv_allocator.c @@ -0,0 +1,862 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + int32_t *next_ptr = *map + current.offset; + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) +{ + union anv_free_list current, old, new; + int32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(util_is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->bo.size = 0; + pool->block_size = block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->back_free_list = ANV_FREE_LIST_EMPTY; + + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return; + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) + return; + + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + pool->state.next = 0; + pool->state.end = 0; + pool->back_state.next = 0; + pool->back_state.end = 0; + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.end = anv_block_pool_grow(pool, &pool->state); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +#define PAGE_SIZE 4096 + +/** Grows and re-centers the block pool. + * + * We grow the block pool in one or both directions in such a way that the + * following conditions are met: + * + * 1) The size of the entire pool is always a power of two. + * + * 2) The pool only grows on both ends. Neither end can get + * shortened. + * + * 3) At the end of the allocation, we have about twice as much space + * allocated for each end as we have used. This way the pool doesn't + * grow too far in one direction or the other. + * + * 4) If the _alloc_back() has never been called, then the back portion of + * the pool retains a size of zero. (This makes it easier for users of + * the block pool that only want a one-sided pool.) + * + * 5) We have enough space allocated for at least one more block in + * whichever side `state` points to. + * + * 6) The center of the pool is always aligned to both the block_size of + * the pool and a 4K CPU page. + */ +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) +{ + size_t size; + void *map; + uint32_t gem_handle; + struct anv_mmap_cleanup *cleanup; + + pthread_mutex_lock(&pool->device->mutex); + + assert(state == &pool->state || state == &pool->back_state); + + /* Gather a little usage information on the pool. Since we may have + * threadsd waiting in queue to get some storage while we resize, it's + * actually possible that total_used will be larger than old_size. In + * particular, block_pool_alloc() increments state->next prior to + * calling block_pool_grow, so this ensures that we get enough space for + * which ever side tries to grow the pool. + * + * We align to a page size because it makes it easier to do our + * calculations later in such a way that we state page-aigned. + */ + uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); + uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); + uint32_t total_used = front_used + back_used; + + assert(state == &pool->state || back_used > 0); + + size_t old_size = pool->bo.size; + + if (old_size != 0 && + back_used * 2 <= pool->center_bo_offset && + front_used * 2 <= (old_size - pool->center_bo_offset)) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + if (old_size == 0) { + /* This is the first allocation */ + size = MAX2(32 * pool->block_size, PAGE_SIZE); + } else { + size = old_size * 2; + } + + /* We can't have a block pool bigger than 1GB because we use signed + * 32-bit offsets in the free list and we don't want overflow. We + * should never need a block pool bigger than 1GB anyway. + */ + assert(size <= (1u << 31)); + + /* We compute a new center_bo_offset such that, when we double the size + * of the pool, we maintain the ratio of how much is used by each side. + * This way things should remain more-or-less balanced. + */ + uint32_t center_bo_offset; + if (back_used == 0) { + /* If we're in this case then we have never called alloc_back(). In + * this case, we want keep the offset at 0 to make things as simple + * as possible for users that don't care about back allocations. + */ + center_bo_offset = 0; + } else { + /* Try to "center" the allocation based on how much is currently in + * use on each side of the center line. + */ + center_bo_offset = ((uint64_t)size * back_used) / total_used; + + /* Align down to a multiple of both the block size and page size */ + uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); + assert(util_is_power_of_two(granularity)); + center_bo_offset &= ~(granularity - 1); + + assert(center_bo_offset >= back_used); + + /* Make sure we don't shrink the back end of the pool */ + if (center_bo_offset < pool->back_state.end) + center_bo_offset = pool->back_state.end; + + /* Make sure that we don't shrink the front end of the pool */ + if (size - center_bo_offset < pool->state.end) + center_bo_offset = size - pool->state.end; + } + + assert(center_bo_offset % pool->block_size == 0); + assert(center_bo_offset % PAGE_SIZE == 0); + + /* Assert that we only ever grow the pool */ + assert(center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->state.end); + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + goto fail; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + cleanup->map = map; + cleanup->size = size; + + if (map == MAP_FAILED) + goto fail; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + goto fail; + cleanup->gem_handle = gem_handle; + +#if 0 + /* Regular objects are created I915_CACHING_CACHED on LLC platforms and + * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are + * always created as I915_CACHING_CACHED, which on non-LLC means + * snooped. That can be useful but comes with a bit of overheard. Since + * we're eplicitly clflushing and don't want the overhead we need to turn + * it off. */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); + anv_gem_set_domain(pool->device, gem_handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + } +#endif + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map + center_bo_offset; + pool->center_bo_offset = center_bo_offset; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + +done: + pthread_mutex_unlock(&pool->device->mutex); + + /* Return the appropreate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + if (state == &pool->state) { + return pool->bo.size - pool->center_bo_offset; + } else { + assert(pool->center_bo_offset > 0); + return pool->center_bo_offset; + } + +fail: + pthread_mutex_unlock(&pool->device->mutex); + + return 0; +} + +static uint32_t +anv_block_pool_alloc_new(struct anv_block_pool *pool, + struct anv_block_state *pool_state) +{ + struct anv_block_state state, old, new; + + while (1) { + state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool_state->next acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, pool_state); + assert(new.end >= new.next && new.end % pool->block_size == 0); + old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); + if (old.next != state.next) + futex_wake(&pool_state->end, INT_MAX); + return state.next; + } else { + futex_wait(&pool_state->end, state.end); + continue; + } + } +} + +int32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(offset >= 0); + assert(pool->map); + return offset; + } + + return anv_block_pool_alloc_new(pool, &pool->state); +} + +/* Allocates a block out of the back of the block pool. + * + * This will allocated a block earlier than the "start" of the block pool. + * The offsets returned from this function will be negative but will still + * be correct relative to the block pool's map pointer. + * + * If you ever use anv_block_pool_alloc_back, then you will have to do + * gymnastics with the block pool's BO when doing relocations. + */ +int32_t +anv_block_pool_alloc_back(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { + assert(offset < 0); + assert(pool->map); + return offset; + } + + offset = anv_block_pool_alloc_new(pool, &pool->back_state); + + /* The offset we get out of anv_block_pool_alloc_new() is actually the + * number of bytes downwards from the middle to the end of the block. + * We need to turn it into a (negative) offset from the middle to the + * start of the block. + */ + assert(offset >= 0); + return -(offset + pool->block_size); +} + +void +anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) +{ + if (offset < 0) { + anv_free_list_push(&pool->back_free_list, pool->map, offset); + } else { + anv_free_list_push(&pool->free_list, pool->map, offset); + } +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && util_is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + int32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { + assert(offset >= 0); + return offset; + } + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return offset; + } else { + futex_wait(&pool->block.end, block.end); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(util_is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct anv_state_stream_block { + /* The next block */ + struct anv_state_stream_block *next; + + /* The offset into the block pool at which this block starts */ + uint32_t offset; + +#ifdef HAVE_VALGRIND + /* A pointer to the first user-allocated thing in this block. This is + * what valgrind sees as the start of the block. + */ + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->block = NULL; + + /* Ensure that next + whatever > end. This way the first call to + * state_stream_alloc fetches a new block. + */ + stream->next = 1; + stream->end = 0; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + VG(const uint32_t block_size = stream->block_pool->block_size); + + struct anv_state_stream_block *next = stream->block; + while (next != NULL) { + VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); + struct anv_state_stream_block sb = VG_NOACCESS_READ(next); + VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); + anv_block_pool_free(stream->block_pool, sb.offset); + next = sb.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct anv_state_stream_block *sb = stream->block; + + struct anv_state state; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + uint32_t block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + + VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); + sb->next = stream->block; + sb->offset = block; + VG(sb->_vg_ptr = NULL); + VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); + + stream->block = sb; + stream->start = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + assert(state.offset > stream->start); + state.map = (void *)sb + (state.offset - stream->start); + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); + } else { + void *state_end = state.map + state.alloc_size; + /* This only updates the mempool. The newly allocated chunk is still + * marked as NOACCESS. */ + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); + /* Mark the newly allocated chunk as undefined */ + VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..d24dd06d7eb --- /dev/null +++ b/src/intel/vulkan/anv_batch_chain.c @@ -0,0 +1,1077 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen8_pack.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->reloc_bos == NULL) { + anv_free(alloc, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + return anv_reloc_list_init_clone(list, alloc, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) { + anv_free(alloc, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + const uint32_t domain = + target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; + + anv_reloc_list_grow(list, alloc, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = domain; + entry->write_domain = domain; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, alloc, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->alloc, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->alloc, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, + &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->last_ss_pool_bo_offset = 0; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_free(&cmd_buffer->pool->alloc, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, + struct anv_cmd_buffer *cmd_buffer, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo = NULL; + result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + return (struct anv_address) { + .bo = &cmd_buffer->device->surface_state_block_pool.bo, + .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), + }; +} + +static void +emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + /* In gen8+ the address field grew to two dwords to accomodate 48 bit + * offsets. The high 16 bits are in the last dword, so we can use the gen8 + * version in either case, as long as we set the instruction length in the + * header accordingly. This means that we always emit three dwords here + * and all the padding and adjustment we do in this file works for all + * gens. + */ + + const uint32_t gen7_length = + GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; + const uint32_t gen8_length = + GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, + .DWordLength = cmd_buffer->device->info.gen < 8 ? + gen7_length : gen8_length, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { bo, offset }); +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); + struct anv_state state; + + state.alloc_size = align_u32(entries * 4, 32); + + if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) + return (struct anv_state) { 0 }; + + state.offset = cmd_buffer->bt_next; + state.map = block_pool->map + *bt_block + state.offset; + + cmd_buffer->bt_next += state.alloc_size; + + assert(*bt_block < 0); + *state_offset = -(*bt_block); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) +{ + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + + int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); + if (offset == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *offset = anv_block_pool_alloc_back(block_pool); + cmd_buffer->bt_next = 0; + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + + result = anv_batch_bo_create(cmd_buffer, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_batch_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + + success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), + 8 * sizeof(int32_t)); + if (!success) + goto fail_seen_bbos; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, + &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bt_blocks; + + anv_cmd_buffer_new_binding_table_block(cmd_buffer); + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_bt_blocks: + anv_vector_finish(&cmd_buffer->bt_blocks); + fail_seen_bbos: + anv_vector_finish(&cmd_buffer->seen_bbos); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, cmd_buffer); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + int32_t *bt_block; + anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + anv_vector_finish(&cmd_buffer->bt_blocks); + + anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { + int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); + cmd_buffer->bt_next = 0; + + cmd_buffer->surface_relocs.num_relocs = 0; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* When we start a batch buffer, we subtract a certain amount of + * padding from the end to ensure that we always have room to emit a + * BATCH_BUFFER_START to chain to the next BO. We need to remove + * that padding before we end the batch; otherwise, we may end up + * with our BATCH_BUFFER_END in another BO. + */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (!(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* When we chain, we need to add an MI_BATCH_BUFFER_START command + * with its relocation. In order to handle this we'll increment here + * so we can unconditionally decrement right before adding the + * MI_BATCH_BUFFER_START command. + */ + batch_bo->relocs.num_relocs++; + cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + emit_batch_buffer_start(primary, &first_bbo->bo, 0); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; + + /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we + * can emit a new command and relocation for the current splice. In + * order to handle the initial-use case, we incremented next and + * num_relocs in end_batch_buffer() so we can alyways just subtract + * here. + */ + last_bbo->relocs.num_relocs--; + secondary->batch.next -= inst_size; + emit_batch_buffer_start(secondary, &this_bbo->bo, offset); + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + + /* After patching up the secondary buffer, we need to clflush the + * modified instruction in case we're on a !llc platform. We use a + * little loop to handle the case where the instruction crosses a cache + * line boundary. + */ + if (!primary->device->info.has_llc) { + void *inst = secondary->batch.next - inst_size; + void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); + __builtin_ia32_mfence(); + while (p < secondary->batch.next) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, + &secondary->surface_relocs, 0); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) { + anv_free(&cmd_buffer->pool->alloc, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* A quick sanity check on relocations */ + assert(relocs->relocs[i].offset < bo->size); + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +static uint64_t +read_reloc(const struct anv_device *device, const void *p) +{ + if (device->info.gen >= 8) + return *(uint64_t *)p; + else + return *(uint32_t *)p; +} + +static void +write_reloc(const struct anv_device *device, void *p, uint64_t v) +{ + if (device->info.gen >= 8) + *(uint64_t *)p = v; + else + *(uint32_t *)p = v; +} + +static void +adjust_relocations_from_block_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs) +{ + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* In general, we don't know how stale the relocated value is. It + * may have been used last time or it may not. Since we don't want + * to stomp it while the GPU may be accessing it, we haven't updated + * it anywhere else in the code. Instead, we just set the presumed + * offset to what it is now based on the delta and the data in the + * block pool. Then the kernel will update it for us if needed. + */ + assert(relocs->relocs[i].offset < pool->state.end); + const void *p = pool->map + relocs->relocs[i].offset; + + /* We're reading back the relocated value from potentially incoherent + * memory here. However, any change to the value will be from the kernel + * writing out relocations, which will keep the CPU cache up to date. + */ + relocs->relocs[i].presumed_offset = + read_reloc(pool->device, p) - relocs->relocs[i].delta; + + /* All of the relocations from this block pool to other BO's should + * have been emitted relative to the surface block pool center. We + * need to add the center offset to make them relative to the + * beginning of the actual GEM bo. + */ + relocs->relocs[i].offset += pool->center_bo_offset; + } +} + +static void +adjust_relocations_to_block_pool(struct anv_block_pool *pool, + struct anv_bo *from_bo, + struct anv_reloc_list *relocs, + uint32_t *last_pool_center_bo_offset) +{ + assert(*last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + + /* When we initially emit relocations into a block pool, we don't + * actually know what the final center_bo_offset will be so we just emit + * it as if center_bo_offset == 0. Now that we know what the center + * offset is, we need to walk the list of relocations and adjust any + * relocations that point to the pool bo with the correct offset. + */ + for (size_t i = 0; i < relocs->num_relocs; i++) { + if (relocs->reloc_bos[i] == &pool->bo) { + /* Adjust the delta value in the relocation to correctly + * correspond to the new delta. Initially, this value may have + * been negative (if treated as unsigned), but we trust in + * uint32_t roll-over to fix that for us at this point. + */ + relocs->relocs[i].delta += delta; + + /* Since the delta has changed, we need to update the actual + * relocated value with the new presumed value. This function + * should only be called on batch buffers, so we know it isn't in + * use by the GPU at the moment. + */ + assert(relocs->relocs[i].offset < from_bo->size); + write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, + relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta); + } + } + + *last_pool_center_bo_offset = pool->center_bo_offset; +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_block_pool *ss_pool = + &cmd_buffer->device->surface_state_block_pool; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + &(*bbo)->last_ss_pool_bo_offset); + + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + } + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; + cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; + cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + first_batch_bo->bo.index = last_idx; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + + if (!cmd_buffer->device->info.has_llc) { + __builtin_ia32_mfence(); + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo.map + i); + } + } + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | + I915_EXEC_CONSTANTS_REL_GENERAL, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c new file mode 100644 index 00000000000..b060828cf61 --- /dev/null +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -0,0 +1,1191 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + +static void +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + + memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); + memset(state->binding_tables, 0, sizeof(state->binding_tables)); + memset(state->samplers, 0, sizeof(state->samplers)); + + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; + + state->dirty = ~0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->push_constants_dirty = 0; + state->pipeline = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; + state->need_query_wa = true; + + if (state->attachments != NULL) { + anv_free(&cmd_buffer->pool->alloc, state->attachments); + state->attachments = NULL; + } + + state->gen7.index_buffer = NULL; +} + +/** + * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. + */ +void +anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); + + anv_free(&cmd_buffer->pool->alloc, state->attachments); + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return; + } + + state->attachments = anv_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * + sizeof(state->attachments[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + abort(); + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags clear_aspects = 0; + + if (anv_format_is_color(att->format)) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if (att->format->has_depth && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + if (clear_aspects) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + } +} + +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) +{ + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; + cmd_buffer->state.attachments = NULL; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + + return result; +} + +VkResult anv_AllocateCommandBuffers( + VkDevice _device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->usage_flags = 0; + cmd_buffer->state.current_pipeline = UINT32_MAX; + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + anv_cmd_state_reset(cmd_buffer); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); + + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + + anv_cmd_buffer_set_subpass(cmd_buffer, subpass); + } + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstViewport + viewportCount; + if (cmd_buffer->state.dynamic.viewport.count < total_count); + cmd_buffer->state.dynamic.viewport.count = total_count; + + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstScissor + scissorCount; + if (cmd_buffer->state.dynamic.scissor.count < total_count); + cmd_buffer->state.dynamic.scissor.count = total_count; + + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void anv_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + descriptorSetCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + uint32_t range = 0; + if (desc->buffer_view) + range = desc->buffer_view->range; + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = range; + desc++; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } + } +} + +void anv_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); + } +} + +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); + + default: + unreachable("Invalid descriptor type"); + } +} + +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_bind_map *map; + uint32_t color_count, bias, state_offset; + + switch (stage) { + case MESA_SHADER_FRAGMENT: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = MAX_RTS; + color_count = subpass->color_count; + break; + case MESA_SHADER_COMPUTE: + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + bias = 1; + color_count = 0; + break; + default: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = 0; + color_count = 0; + break; + } + + if (color_count + map->surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + map->surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + + if (stage == MESA_SHADER_COMPUTE && + cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + const struct anv_format *format = + anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, + format->isl_format, bo_offset, 12, 1); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + + if (map->surface_count == 0) + goto out; + + if (map->image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->sampler_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, + image_param); + image_param->surface_idx = bias + s; + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == map->image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, struct anv_state *state) +{ + struct anv_pipeline_bind_map *map; + + if (stage == MESA_SHADER_COMPUTE) + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + else + map = &cmd_buffer->state.pipeline->bindings[stage]; + + if (map->sampler_count == 0) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + uint32_t size = map->sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); + + return state; +} + +struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +/** + * @brief Setup the command buffer for recording commands inside the given + * subpass. + * + * This does not record all commands needed for starting the subpass. + * Starting the subpass may require additional commands. + * + * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the + * command buffer for recording commands for some subpass. But only the first + * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. + */ +void +anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 8: + gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 9: + gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } +} + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = + (local_id_dwords + prog_data->nr_params) * 4; + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + const unsigned push_constant_alignment = + cmd_buffer->device->info.gen < 8 ? 32 : 64; + const unsigned aligned_total_push_constants_size = + ALIGN(total_push_constants_size, push_constant_alignment); + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + aligned_total_push_constants_size, + push_constant_alignment); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +void anv_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; +} + +void anv_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void anv_DestroyCommandPool( + VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + anv_ResetCommandPool(_device, commandPool, 0); + + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + /* FIXME: vkResetCommandPool must not destroy its command buffers. The + * Vulkan 1.0 spec requires that it only reset them: + * + * Resetting a command pool recycles all of the resources from all of + * the command buffers allocated from the command pool back to the + * command pool. All command buffers that have been allocated from the + * command pool are put in the initial state. + */ + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + return VK_SUCCESS; +} + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_image_view *iview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + return iview; +} diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c new file mode 100644 index 00000000000..7a77336602a --- /dev/null +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/* + * Descriptor set layouts. + */ + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t max_binding = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + } + + size_t size = sizeof(struct anv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); + + set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[max_binding + 1]; + + set_layout->binding_count = max_binding + 1; + set_layout->shader_stages = 0; + set_layout->size = 0; + + for (uint32_t b = 0; b <= max_binding; b++) { + /* Initialize all binding_layout entries to -1 */ + memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); + + set_layout->binding[b].immutable_samplers = NULL; + } + + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + + uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t buffer_count = 0; + uint32_t dynamic_offset_count = 0; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + uint32_t b = binding->binding; + + assert(binding->descriptorCount > 0); + set_layout->binding[b].array_size = binding->descriptorCount; + set_layout->binding[b].descriptor_index = set_layout->size; + set_layout->size += binding->descriptorCount; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].buffer_index = buffer_count; + buffer_count += binding->descriptorCount; + /* fall through */ + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += binding->descriptorCount; + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].image_index = image_count[s]; + image_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; + + for (uint32_t i = 0; i < binding->descriptorCount; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(binding->pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + + set_layout->shader_stages |= binding->stageFlags; + } + + set_layout->buffer_count = buffer_count; + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_free2(&device->alloc, pAllocator, set_layout); +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just muttiple descriptor set layouts pasted together + */ + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + + unsigned dynamic_offset_count = 0; + + memset(layout->stage, 0, sizeof(layout->stage)); + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->set[set].layout = set_layout; + + layout->set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + if (set_layout->binding[b].stage[s].surface_index >= 0) + layout->stage[s].has_dynamic_offsets = true; + } + } + } + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_free2(&device->alloc, pAllocator, pipeline_layout); +} + +/* + * Descriptor pools. These are a no-op for now. + */ + +VkResult anv_CreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + *pDescriptorPool = (VkDescriptorPool)1; + return VK_SUCCESS; +} + +void anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + + set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + set->layout = layout; + + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + + /* XXX: Use the pool */ + set->buffer_views = + anv_alloc(&device->alloc, + sizeof(set->buffer_views[0]) * layout->buffer_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set->buffer_views) { + anv_free(&device->alloc, set); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < layout->buffer_count; b++) { + set->buffer_views[b].surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + set->buffer_count = layout->buffer_count; + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + /* XXX: Use the pool */ + for (uint32_t b = 0; b < set->buffer_count; b++) + anv_state_pool_free(&device->surface_state_pool, + set->buffer_views[b].surface_state); + + anv_free(&device->alloc, set->buffer_views); + anv_free(&device->alloc, set); +} + +VkResult anv_AllocateDescriptorSets( + VkDevice _device, + const VkDescriptorSetAllocateInfo* pAllocateInfo, + VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result = VK_SUCCESS; + struct anv_descriptor_set *set; + uint32_t i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); + + return result; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +void anv_UpdateDescriptorSets( + VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->dstBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; + desc += write->dstArrayElement; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + desc[j].image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc[j].sampler = sampler; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .image_view = iview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = bview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + assert(buffer); + + struct anv_buffer_view *view = + &set->buffer_views[bind_layout->buffer_index]; + view += write->dstArrayElement + j; + + const struct anv_format *format = + anv_format_for_descriptor_type(write->descriptorType); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + write->pBufferInfo[j].offset; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0 || + write->pBufferInfo[j].range == VK_WHOLE_SIZE) + view->range = buffer->size - write->pBufferInfo[j].offset; + else + view->range = write->pBufferInfo[j].range; + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, 1); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = view, + }; + + } + + default: + break; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); + ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); + + const struct anv_descriptor_set_binding_layout *src_layout = + &src->layout->binding[copy->srcBinding]; + struct anv_descriptor *src_desc = + &src->descriptors[src_layout->descriptor_index]; + src_desc += copy->srcArrayElement; + + const struct anv_descriptor_set_binding_layout *dst_layout = + &dst->layout->binding[copy->dstBinding]; + struct anv_descriptor *dst_desc = + &dst->descriptors[dst_layout->descriptor_index]; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } +} diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c new file mode 100644 index 00000000000..a8835f74179 --- /dev/null +++ b/src/intel/vulkan/anv_device.c @@ -0,0 +1,1789 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" +#include "util/strtod.h" +#include "util/debug.h" + +#include "genxml/gen7_pack.h" + +struct anv_dispatch_table dtable; + +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + VkResult result; + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to open %s: %m", path); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = instance; + device->path = path; + + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + if (!device->chipset_id) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get chipset id: %m"); + goto fail; + } + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id); + if (!device->info) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get device info"); + goto fail; + } + + if (device->info->is_haswell) { + fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && !device->info->is_baytrail) { + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && device->info->is_baytrail) { + fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); + } else if (device->info->gen >= 8) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully + * supported as anything */ + } else { + result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Vulkan not yet supported on %s", device->name); + goto fail; + } + + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); + goto fail; + } + + if (!device->info->has_llc && + anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + goto fail; + } + + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + + close(fd); + + brw_process_intel_debug_variable(); + + device->compiler = brw_compiler_create(NULL, device->info); + if (device->compiler == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; + + /* XXX: Actually detect bit6 swizzling */ + isl_device_init(&device->isl_dev, device->info, swizzled); + + return VK_SUCCESS; + +fail: + close(fd); + return result; +} + +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + ralloc_free(device->compiler); +} + +static const VkExtensionProperties global_extensions[] = { + { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 25, + }, + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#ifdef HAVE_WAYLAND_PLATFORM + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 4, + }, +#endif +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 67, + }, +}; + +static void * +default_alloc_func(void *pUserData, size_t size, size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, void *pOriginal, size_t size, + size_t align, VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + struct anv_instance *instance; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + uint32_t client_version = pCreateInfo->pApplicationInfo ? + pCreateInfo->pApplicationInfo->apiVersion : + VK_MAKE_VERSION(1, 0, 0); + if (VK_MAKE_VERSION(1, 0, 0) > client_version || + client_version > VK_MAKE_VERSION(1, 0, 3)) { + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Client requested version %d.%d.%d", + VK_VERSION_MAJOR(client_version), + VK_VERSION_MINOR(client_version), + VK_VERSION_PATCH(client_version)); + } + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + + instance->apiVersion = client_version; + instance->physicalDeviceCount = -1; + + _mesa_locale_init(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + anv_init_wsi(instance); + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +void anv_DestroyInstance( + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + anv_physical_device_finish(&instance->physicalDevice); + } + + anv_finish_wsi(instance); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + _mesa_locale_fini(); + + anv_free(&instance->alloc, instance); +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount < 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { + return result; + } + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = true, + .fullDrawIndexUint32 = true, + .imageCubeArray = false, + .independentBlend = pdevice->info->gen >= 8, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = true, + .logicOp = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = false, /* FINISHME */ + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .alphaToOne = true, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +void +anv_device_get_cache_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); +} + +void anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct brw_device_info *devinfo = pdevice->info; + + anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + + const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + + VkSampleCountFlags sample_counts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -16384.0, 16384.0 }, + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .driverVersion = 1, + .vendorID = 0x8086, + .deviceID = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ + }; + + strcpy(pProperties->deviceName, pdevice->name); + anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); +} + +void anv_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties) +{ + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + return; + } + + assert(*pCount >= 1); + + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 36, /* XXX: Real value here */ + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + }; +} + +void anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkDeviceSize heap_size; + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * physical_device->aperture_size / 4; + + if (physical_device->info->has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + pMemoryProperties->memoryTypeCount = 2; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_GetInstanceProcAddr(instance, pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + queue->device = device; + queue->pool = &device->surface_state_pool; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +} + +static struct anv_state +anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) +{ + struct anv_state state; + + state = anv_state_pool_alloc(pool, size, align); + memcpy(state.map, p, size); + + if (!pool->block_pool->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const struct gen8_border_color border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, + sizeof(border_colors), 64, + border_colors); +} + +VkResult +anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo bo; + VkResult result = VK_SUCCESS; + uint32_t size; + int64_t timeout; + int ret; + + /* Kernel driver requires 8 byte aligned batch length */ + size = align_u32(batch->next - batch->start, 8); + assert(size < device->batch_bo_pool.bo_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + if (result != VK_SUCCESS) + return result; + + memcpy(bo.map, batch->start, size); + if (!device->info.has_llc) + anv_clflush_range(bo.map, size); + + exec2_objects[0].handle = bo.gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo.offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo.gem_handle, &timeout); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + fail: + anv_bo_pool_free(&device->batch_bo_pool, &bo); + + return result; +} + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + anv_set_dispatch_devinfo(physical_device->info); + + device = anv_alloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = physical_device->instance; + device->chipset_id = physical_device->chipset_id; + + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_device; + } + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; + + pthread_mutex_init(&device->mutex, NULL); + + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); + anv_pipeline_cache_init(&device->default_pipeline_cache, device); + + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_bo_init_new(&device->workaround_bo, device, 1024); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + anv_queue_init(device, &device->queue); + + switch (device->info.gen) { + case 7: + if (!device->info.is_haswell) + result = gen7_init_device_state(device); + else + result = gen75_init_device_state(device); + break; + case 8: + result = gen8_init_device_state(device); + break; + case 9: + result = gen9_init_device_state(device); + break; + default: + /* Shouldn't get here as we don't create physical devices for any other + * gens. */ + unreachable("unhandled gen"); + } + if (result != VK_SUCCESS) + goto fail_fd; + + result = anv_device_init_meta(device); + if (result != VK_SUCCESS) + goto fail_fd; + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_free(&device->alloc, device); + + return result; +} + +void anv_DestroyDevice( + VkDevice _device, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); + anv_gem_close(device, device->workaround_bo.gem_handle); + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); + + close(device->fd); + + pthread_mutex_destroy(&device->mutex); + + anv_free(&device->alloc, device); +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + + *pPropertyCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(device_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + + *pPropertyCount = ARRAY_SIZE(device_extensions); + memcpy(pProperties, device_extensions, sizeof(device_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateInstanceLayerProperties( + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +VkResult anv_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +void anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + + for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) + cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + } + } + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_batch batch; + + uint32_t cmds[8]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + return anv_device_submit_simple_batch(device, &batch); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + bo->is_winsys_bo = false; + + return VK_SUCCESS; +} + +VkResult anv_AllocateMemory( + VkDevice _device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + /* We support exactly one memory heap. */ + assert(pAllocateInfo->memoryTypeIndex == 0 || + (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The kernel is going to give us whole pages anyway */ + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + + result = anv_bo_init_new(&mem->bo, device, alloc_size); + if (result != VK_SUCCESS) + goto fail; + + mem->type_index = pAllocateInfo->memoryTypeIndex; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} + +void anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem == NULL) + return; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_free2(&device->alloc, pAllocator, mem); +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->bo.size - offset; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + uint32_t gem_flags = 0; + if (!device->info.has_llc && mem->type_index == 0) + gem_flags |= I915_MMAP_WC; + + /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ + uint64_t map_offset = offset & ~4095ull; + assert(offset >= map_offset); + uint64_t map_size = (offset + size) - map_offset; + + /* Let's map whole pages */ + map_size = align_u64(map_size, 4096); + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + mem->map_size = map_size; + + *ppData = mem->map + (offset - map_offset); + + return VK_SUCCESS; +} + +void anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _memory) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) + return; + + anv_gem_munmap(mem->map, mem->map_size); +} + +static void +clflush_mapped_ranges(struct anv_device *device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); + void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); + void *end; + + if (ranges[i].offset + ranges[i].size > mem->map_size) + end = mem->map + mem->map_size; + else + end = mem->map + ranges[i].offset + ranges[i].size; + + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + /* Make sure no reads get moved up above the invalidate. */ + __builtin_ia32_mfence(); + + return VK_SUCCESS; +} + +void anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; +} + +void anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; +} + +void anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + stub(); +} + +void anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + if (mem) { + buffer->bo = &mem->bo; + buffer->offset = memoryOffset; + } else { + buffer->bo = NULL; + buffer->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (mem) { + image->bo = &mem->bo; + image->offset = memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) +{ + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + if (!device->info.has_llc) { + assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); + assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + __builtin_ia32_mfence(); + __builtin_ia32_clflush(fence->bo.map); + } + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + fence->ready = false; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, fence); + + return result; +} + +void anv_DestroyFence( + VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, fence); +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; + + int64_t t = timeout; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem wait failed: %m"); + } + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) +{ + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + + *pSemaphore = (VkSemaphore)1; + + return VK_SUCCESS; +} + +void anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) +{ +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice _device, + const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkEvent* pEvent) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_event *event; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); + + state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(*event), 8); + event = state.map; + event->state = state; + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + *pEvent = anv_event_to_handle(event); + + return VK_SUCCESS; +} + +void anv_DestroyEvent( + VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_state_pool_free(&device->dynamic_state_pool, event->state); +} + +VkResult anv_GetEventStatus( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + if (!device->info.has_llc) { + /* Invalidate read cache before reading event written by GPU. */ + __builtin_ia32_clflush(event); + __builtin_ia32_mfence(); + + } + + return event->semaphore; +} + +VkResult anv_SetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_SET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +VkResult anv_ResetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_free2(&device->alloc, pAllocator, buffer); +} + +void +anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_buffer_surface_state(state.map, format, offset, range, + stride); + else + gen7_fill_buffer_surface_state(state.map, format, offset, range, + stride); + break; + case 8: + gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + case 9: + gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +void anv_DestroySampler( + VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_free2(&device->alloc, pAllocator, sampler); +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + framebuffer->attachments[i] = anv_image_view_from_handle(_iview); + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +void anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_free2(&device->alloc, pAllocator, fb); +} + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) +{ +} diff --git a/src/intel/vulkan/anv_dump.c b/src/intel/vulkan/anv_dump.c new file mode 100644 index 00000000000..b7fa28be787 --- /dev/null +++ b/src/intel/vulkan/anv_dump.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +/* This file contains utility functions for help debugging. They can be + * called from GDB or similar to help inspect images and buffers. + */ + +void +anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename) +{ + VkDevice vk_device = anv_device_to_handle(device); + VkResult result; + + VkExtent2D extent = { image->extent.width, image->extent.height }; + for (unsigned i = 0; i < miplevel; i++) { + extent.width = MAX2(1, extent.width / 2); + extent.height = MAX2(1, extent.height / 2); + } + + VkImage copy_image; + result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = (VkExtent3D) { extent.width, extent.height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .flags = 0, + }, NULL, ©_image); + assert(result == VK_SUCCESS); + + VkMemoryRequirements reqs; + anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + + VkDeviceMemory memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = reqs.size, + .memoryTypeIndex = 0, + }, NULL, &memory); + assert(result == VK_SUCCESS); + + result = anv_BindImageMemory(vk_device, copy_image, memory, 0); + assert(result == VK_SUCCESS); + + VkCommandPool commandPool; + result = anv_CreateCommandPool(vk_device, + &(VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = 0, + .flags = 0, + }, NULL, &commandPool); + assert(result == VK_SUCCESS); + + VkCommandBuffer cmd; + result = anv_AllocateCommandBuffers(vk_device, + &(VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }, &cmd); + assert(result == VK_SUCCESS); + + result = anv_BeginCommandBuffer(cmd, + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }); + assert(result == VK_SUCCESS); + + anv_CmdBlitImage(cmd, + anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, + copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, + &(VkImageBlit) { + .srcSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = miplevel, + .baseArrayLayer = array_layer, + .layerCount = 1, + }, + .srcOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + .dstSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + }, VK_FILTER_NEAREST); + + ANV_CALL(CmdPipelineBarrier)(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + true, 0, NULL, 0, NULL, 1, + &(VkImageMemoryBarrier) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = 0, + .dstQueueFamilyIndex = 0, + .image = copy_image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + result = anv_EndCommandBuffer(cmd); + assert(result == VK_SUCCESS); + + VkFence fence; + result = anv_CreateFence(vk_device, + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, NULL, &fence); + assert(result == VK_SUCCESS); + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, fence); + assert(result == VK_SUCCESS); + + result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); + assert(result == VK_SUCCESS); + + anv_DestroyFence(vk_device, fence, NULL); + anv_DestroyCommandPool(vk_device, commandPool, NULL); + + uint8_t *map; + result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); + assert(result == VK_SUCCESS); + + VkSubresourceLayout layout; + anv_GetImageSubresourceLayout(vk_device, copy_image, + &(VkImageSubresource) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .arrayLayer = 0, + }, &layout); + + map += layout.offset; + + /* Now we can finally write the PPM file */ + FILE *file = fopen(filename, "wb"); + assert(file); + + fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); + for (unsigned y = 0; y < extent.height; y++) { + uint8_t row[extent.width * 3]; + for (unsigned x = 0; x < extent.width; x++) { + row[x * 3 + 0] = map[x * 4 + 0]; + row[x * 3 + 1] = map[x * 4 + 1]; + row[x * 3 + 2] = map[x * 4 + 2]; + } + fwrite(row, 3, extent.width, file); + + map += layout.rowPitch; + } + fclose(file); + + anv_UnmapMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image, NULL); + anv_FreeMemory(vk_device, memory, NULL); +} diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py new file mode 100644 index 00000000000..1e4cfcb1755 --- /dev/null +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + print "/* This file generated from vk_gen.py, don't edit directly. */\n" + + print "struct anv_dispatch_table {" + print " union {" + print " void *entrypoints[%d];" % len(entrypoints) + print " struct {" + + for type, name, args, num, h in entrypoints: + print " %s (*%s)%s;" % (type, name, args) + print " };\n" + print " };\n" + print "};\n" + + print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" + + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s gen7_%s%s;" % (type, name, args) + print "%s gen75_%s%s;" % (type, name, args) + print "%s gen8_%s%s;" % (type, name, args) + print "%s gen9_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x }," % (offsets[num], h) +print "};\n" + +for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: + for type, name, args, num, h in entrypoints: + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print "\nconst struct anv_dispatch_table %s_layer = {" % layer + for type, name, args, num, h in entrypoints: + print " .%s = %s_%s," % (name, layer, name) + print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static const struct brw_device_info *dispatch_devinfo; + +void +anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) +{ + dispatch_devinfo = devinfo; +} + +void * __attribute__ ((noinline)) +anv_resolve_entrypoint(uint32_t index) +{ + if (enable_validate && validate_layer.entrypoints[index]) + return validate_layer.entrypoints[index]; + + if (dispatch_devinfo == NULL) { + assert(anv_layer.entrypoints[index]); + return anv_layer.entrypoints[index]; + } + + switch (dispatch_devinfo->gen) { + case 9: + if (gen9_layer.entrypoints[index]) + return gen9_layer.entrypoints[index]; + /* fall through */ + case 8: + if (gen8_layer.entrypoints[index]) + return gen8_layer.entrypoints[index]; + /* fall through */ + case 7: + if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) + return gen75_layer.entrypoints[index]; + + if (gen7_layer.entrypoints[index]) + return gen7_layer.entrypoints[index]; + /* fall through */ + case 0: + return anv_layer.entrypoints[index]; + default: + unreachable("unsupported gen\\n"); + } +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return anv_resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c new file mode 100644 index 00000000000..7798a7bbde3 --- /dev/null +++ b/src/intel/vulkan/anv_formats.c @@ -0,0 +1,603 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "brw_surface_formats.h" + +#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) +#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) + +#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ + [__vk_fmt] = { \ + .vk_format = __vk_fmt, \ + .name = #__vk_fmt, \ + .isl_format = __hw_fmt, \ + .isl_layout = &isl_format_layouts[__hw_fmt], \ + .swizzle = __swizzle, \ + __VA_ARGS__ \ + } + +#define fmt(__vk_fmt, __hw_fmt, ...) \ + swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) + +/* HINT: For array formats, the ISL name should match the VK name. For + * packed formats, they should have the channels in reverse order from each + * other. The reason for this is that, for packed formats, the ISL (and + * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. + */ +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), + fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), + swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), + swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), + fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), + fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), + fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), + fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), + fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), + fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle) +{ + const struct anv_format *anv_fmt = &anv_formats[format]; + + if (swizzle) + *swizzle = anv_fmt->swizzle; + + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { + return ISL_FORMAT_UNSUPPORTED; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + !util_is_power_of_two(anv_fmt->isl_layout->bs)) { + /* Tiled formats *must* be power-of-two because we need up upload + * them with the render pipeline. For 3-channel formats, we fix + * this by switching them over to RGBX or RGBA formats under the + * hood. + */ + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); + if (rgbx != ISL_FORMAT_UNSUPPORTED) + return rgbx; + else + return isl_format_rgb_to_rgba(anv_fmt->isl_format); + } else { + return anv_fmt->isl_format; + } + + case VK_IMAGE_ASPECT_DEPTH_BIT: + case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): + assert(anv_fmt->has_depth); + return anv_fmt->isl_format; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(anv_fmt->has_stencil); + return ISL_FORMAT_R8_UINT; + + default: + unreachable("bad VkImageAspect"); + return ISL_FORMAT_UNSUPPORTED; + } +} + +// Format capabilities + +void anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +static VkFormatFeatureFlags +get_image_format_properties(int gen, enum isl_format base, + enum isl_format actual, + struct anv_format_swizzle swizzle) +{ + const struct brw_surface_format_info *info = &surface_formats[actual]; + + if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen) { + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + + if (info->filtering <= gen) + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + /* We can render to swizzled formats. However, if the alpha channel is + * moved, then blending won't work correctly. The PRM tells us + * straight-up not to render to such a surface. + */ + if (info->render_target <= gen && swizzle.a == 3) { + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (info->alpha_blend <= gen && swizzle.a == 3) + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + /* Load/store is determined based on base format. This prevents RGB + * formats from showing up as load/store capable. + */ + if (isl_is_storage_image_format(base)) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + + if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + + return flags; +} + +static VkFormatFeatureFlags +get_buffer_format_properties(int gen, enum isl_format format) +{ + const struct brw_surface_format_info *info = &surface_formats[format]; + + if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen && !isl_format_is_compressed(format)) + flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + if (info->input_vb <= gen) + flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (isl_is_storage_image_format(format)) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + + return flags; +} + +static void +anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + int gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + if (anv_format_is_depth_or_stencil(&anv_formats[format])) { + tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (physical_device->info->gen >= 8) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + if (anv_formats[format].has_depth) { + tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } else { + enum isl_format linear_fmt, tiled_fmt; + struct anv_format_swizzle linear_swizzle, tiled_swizzle; + linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, &linear_swizzle); + tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); + + linear = get_image_format_properties(gen, linear_fmt, linear_fmt, + linear_swizzle); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, + tiled_swizzle); + buffer = get_buffer_format_properties(gen, linear_fmt); + + /* XXX: We handle 3-channel formats by switching them out for RGBX or + * RGBA formats behind-the-scenes. This works fine for textures + * because the upload process will fill in the extra channel. + * We could also support it for render targets, but it will take + * substantially more work and we have enough RGBX formats to handle + * what most clients will want. + */ + if (linear_fmt != ISL_FORMAT_UNSUPPORTED && + !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && + isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { + tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & + ~VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; + + return; +} + + +void anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + anv_physical_device_get_format_properties( + physical_device, + format, + pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties* pImageFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + + anv_physical_device_get_format_properties(physical_device, format, + &format_props); + + /* Extract the VkFormatFeatureFlags that are relevant for the queried + * tiling. + */ + if (tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + switch (type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + sampleCounts = VK_SAMPLE_COUNT_1_BIT; + break; + case VK_IMAGE_TYPE_2D: + /* FINISHME: Does this really differ for cube maps? The documentation + * for RENDER_SURFACE_STATE suggests so. + */ + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (tiling == VK_IMAGE_TILING_OPTIMAL && + type == VK_IMAGE_TYPE_2D && + (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + +#if 0 + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (anv_format_for_vk_format(format)->has_stencil) { + /* Not yet implemented because copying to a W-tiled surface is crazy + * hard. + */ + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " + "stencil format"); + goto unsupported; + } + } +#endif + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { + /* Nothing to check. */ + } + + if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + /* Ignore this flag because it was removed from the + * provisional_I_20150910 header. + */ + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; + +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c new file mode 100644 index 00000000000..0a7be353327 --- /dev/null +++ b/src/intel/vulkan/anv_gem.c @@ -0,0 +1,358 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + gem_mmap.flags = flags; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +int +anv_gem_set_caching(struct anv_device *device, + uint32_t gem_handle, uint32_t caching) +{ + struct drm_i915_gem_caching gem_caching; + + VG_CLEAR(gem_caching); + gem_caching.handle = gem_handle; + gem_caching.caching = caching; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + struct drm_i915_gem_set_domain gem_set_domain; + + VG_CLEAR(gem_set_domain); + gem_set_domain.handle = gem_handle; + gem_set_domain.read_domains = read_domains; + gem_set_domain.write_domain = write_domain; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + struct drm_gem_close close; + int ret; + + struct drm_i915_gem_create gem_create; + VG_CLEAR(gem_create); + gem_create.size = 4096; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { + assert(!"Failed to create GEM BO"); + return false; + } + + bool swizzled = false; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + struct drm_i915_gem_set_tiling set_tiling; + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_create.handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + if (ret != 0) { + assert(!"Failed to set BO tiling"); + goto close_and_return; + } + + struct drm_i915_gem_get_tiling get_tiling; + VG_CLEAR(get_tiling); + get_tiling.handle = gem_create.handle; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { + assert(!"Failed to get BO tiling"); + goto close_and_return; + } + + swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; + +close_and_return: + + VG_CLEAR(close); + close.handle = gem_create.handle; + anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + + return swizzled; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c new file mode 100644 index 00000000000..3204fefb28e --- /dev/null +++ b/src/intel/vulkan/anv_gem_stubs.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + /* Ignore flags, as they're specific to I915_GEM_MMAP. */ + (void) flags; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, + uint32_t caching) +{ + return 0; +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + unreachable("Unused"); +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} diff --git a/src/intel/vulkan/anv_gen_macros.h b/src/intel/vulkan/anv_gen_macros.h new file mode 100644 index 00000000000..ef2ecd55a9b --- /dev/null +++ b/src/intel/vulkan/anv_gen_macros.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifdef ANV_GENx10 + +/* Gen checking macros */ +#define ANV_GEN ((ANV_GENx10) / 10) +#define ANV_IS_HASWELL ((ANV_GENx10) == 75) + +/* Prefixing macros */ +#if (ANV_GENx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (ANV_GENx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (ANV_GENx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (ANV_GENx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for your gen" +#endif + +/* Macros for comparing gens */ +#if (ANV_GENx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (ANV_GENx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) + +#endif /* ANV_GENx10 */ diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c new file mode 100644 index 00000000000..0a412a3f8c6 --- /dev/null +++ b/src/intel/vulkan/anv_image.c @@ -0,0 +1,911 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** + * Exactly one bit must be set in \a aspect. + */ +static isl_surf_usage_flags_t +choose_isl_surf_usage(VkImageUsageFlags vk_usage, + VkImageAspectFlags aspect) +{ + isl_surf_usage_flags_t isl_usage = 0; + + /* FINISHME: Support aux surfaces */ + isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + + if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + isl_usage |= ISL_SURF_USAGE_CUBE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_DEPTH_BIT: + isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; + break; + } + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta implements transfers by rendering into the destination image. */ + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + + return isl_usage; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +static struct anv_surface * +get_surface(struct anv_image *image, VkImageAspectFlags aspect) +{ + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_COLOR_BIT: + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT: + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return &image->stencil_surface; + } +} + +/** + * Initialize the anv_image::*_surface selected by \a aspect. Then update the + * image's memory requirements (that is, the image's size and alignment). + * + * Exactly one bit must be set in \a aspect. + */ +static VkResult +make_surface(const struct anv_device *dev, + struct anv_image *image, + const struct anv_image_create_info *anv_info, + VkImageAspectFlags aspect) +{ + const VkImageCreateInfo *vk_info = anv_info->vk_info; + bool ok UNUSED; + + static const enum isl_surf_dim vk_to_isl_surf_dim[] = { + [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, + [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, + [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, + }; + + isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; + if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) + tiling_flags &= ISL_TILING_LINEAR_BIT; + + struct anv_surface *anv_surf = get_surface(image, aspect); + + VkExtent3D extent; + switch (vk_info->imageType) { + case VK_IMAGE_TYPE_1D: + extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; + break; + case VK_IMAGE_TYPE_2D: + extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; + break; + case VK_IMAGE_TYPE_3D: + extent = vk_info->extent; + break; + default: + unreachable("invalid image type"); + } + + image->extent = extent; + + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = anv_get_isl_format(vk_info->format, aspect, + vk_info->tiling, NULL), + .width = extent.width, + .height = extent.height, + .depth = extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .min_pitch = 0, + .usage = choose_isl_surf_usage(image->usage, aspect), + .tiling_flags = tiling_flags); + + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + + anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); + image->size = anv_surf->offset + anv_surf->isl.size; + image->alignment = MAX(image->alignment, anv_surf->isl.alignment); + + return VK_SUCCESS; +} + +/** + * Parameter @a format is required and overrides VkImageCreateInfo::format. + */ +static VkImageUsageFlags +anv_image_get_full_usage(const VkImageCreateInfo *info, + const struct anv_format *format) +{ + VkImageUsageFlags usage = info->usage; + + if (info->samples > 1 && + (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + /* Meta will resolve the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* For non-clear transfer operations, meta will transfer to the image by + * binding it as a color attachment, even if the image format is not + * a color format. + */ + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (anv_format_is_depth_or_stencil(format)) { + /* vkCmdClearDepthStencilImage() only requires that + * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does + * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta + * clears the image, though, by binding it as a depthstencil + * attachment. + */ + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + } + + return usage; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + const VkAllocationCallbacks* alloc, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + struct anv_image *image = NULL; + const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arrayLayers > 0); + anv_assert(pCreateInfo->samples > 0); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; + image->format = format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->usage = anv_image_get_full_usage(pCreateInfo, format); + image->tiling = pCreateInfo->tiling; + + if (likely(anv_format_is_color(format))) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_COLOR_BIT); + if (r != VK_SUCCESS) + goto fail; + } else { + if (image->format->has_depth) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT); + if (r != VK_SUCCESS) + goto fail; + } + + if (image->format->has_stencil) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT); + if (r != VK_SUCCESS) + goto fail; + } + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_free2(&device->alloc, alloc, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + .isl_tiling_flags = ISL_TILING_ANY_MASK, + }, + pAllocator, + pImage); +} + +void +anv_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); +} + +static void +anv_surface_get_subresource_layout(struct anv_image *image, + struct anv_surface *surface, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + /* If we are on a non-zero mip level or array slice, we need to + * calculate a real offset. + */ + anv_assert(subresource->mipLevel == 0); + anv_assert(subresource->arrayLayer == 0); + + layout->offset = surface->offset; + layout->rowPitch = surface->isl.row_pitch; + layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); + layout->size = surface->isl.size; +} + +void anv_GetImageSubresourceLayout( + VkDevice device, + VkImage _image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + assert(__builtin_popcount(pSubresource->aspectMask) == 1); + + switch (pSubresource->aspectMask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + anv_surface_get_subresource_layout(image, &image->color_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + anv_surface_get_subresource_layout(image, &image->depth_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + anv_surface_get_subresource_layout(image, &image->stencil_surface, + pSubresource, pLayout); + break; + default: + assert(!"Invalid image aspect"); + } +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_format *view_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspectMask != 0); + assert(subresource->levelCount > 0); + assert(subresource->layerCount > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseArrayLayer < image->array_size); + assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(pView); + + const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT + | VK_IMAGE_ASPECT_STENCIL_BIT; + + /* Validate format. */ + if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(!image->format->has_depth); + assert(!image->format->has_stencil); + assert(!view_format_info->has_depth); + assert(!view_format_info->has_stencil); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } else if (subresource->aspectMask & ds_flags) { + assert((subresource->aspectMask & ~ds_flags) == 0); + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(image->format->has_depth); + assert(view_format_info->has_depth); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image->format->has_stencil); + assert(view_format_info->has_stencil); + } + } else { + assert(!"bad VkImageSubresourceRange::aspectFlags"); + } + + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); +} + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + else + gen7_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 8: + gen8_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 9: + gen9_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +static struct anv_state +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + +static bool +has_matching_storage_typed_format(const struct anv_device *device, + enum isl_format format) +{ + return (isl_format_get_layout(format)->bs <= 4 || + (isl_format_get_layout(format)->bs <= 8 && + (device->info.gen >= 8 || device->info.is_haswell)) || + device->info.gen >= 9); +} + +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, + struct anv_format_swizzle format_swizzle) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + swizzle = component; + + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case VK_COMPONENT_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + case VK_COMPONENT_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + case VK_COMPONENT_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + case VK_COMPONENT_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + default: + unreachable("Invalid swizzle"); + } +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkImageViewCreateInfo mCreateInfo; + memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); + + assert(range->layerCount > 0); + assert(range->baseMipLevel < image->levels); + assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + range->layerCount - 1 + <= anv_minify(image->extent.depth, range->baseMipLevel)); + break; + } + + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + iview->image = image; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset + offset; + + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + iview->vk_format = pCreateInfo->format; + + struct anv_format_swizzle swizzle; + iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, + image->tiling, &swizzle); + iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle); + iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle); + iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle); + iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle); + + iview->base_layer = range->baseArrayLayer; + iview->base_mip = range->baseMipLevel; + + if (!isl_format_is_compressed(iview->format) && + isl_format_is_compressed(image->format->isl_format)) { + /* Scale the ImageView extent by the backing Image. This is used + * internally when an uncompressed ImageView is created on a + * compressed Image. The ImageView can therefore be used for copying + * data from a source Image to a destination Image. + */ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); + iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); + mCreateInfo.subresourceRange.baseMipLevel = 0; + mCreateInfo.subresourceRange.baseArrayLayer = 0; + } else { + iview->level_0_extent.width = image->extent.width; + iview->level_0_extent.height = image->extent.height; + iview->level_0_extent.depth = image->extent.depth; + } + + iview->extent = (VkExtent3D) { + .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), + .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), + .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + }; + + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->sampler_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + } else { + iview->sampler_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->color_rt_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } else { + iview->color_rt_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + if (has_matching_storage_typed_format(device, iview->format)) + anv_fill_image_surface_state(device, iview->storage_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + else + anv_fill_buffer_surface_state(device, iview->storage_surface_state, + ISL_FORMAT_RAW, + iview->offset, + iview->bo->size - iview->offset, 1); + + } else { + iview->storage_surface_state.alloc_size = 0; + } +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL, 0); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + if (iview->color_rt_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->color_rt_surface_state); + } + + if (iview->sampler_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->sampler_surface_state); + } + + if (iview->storage_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->storage_surface_state); + } + + anv_free2(&device->alloc, pAllocator, iview); +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->range = pCreateInfo->range == VK_WHOLE_SIZE ? + buffer->size - view->offset : pCreateInfo->range; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, + format->isl_layout->bs); + } else { + view->surface_state = (struct anv_state){ 0 }; + } + + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + view->storage_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + enum isl_format storage_format = + has_matching_storage_typed_format(device, view->format) ? + isl_lower_storage_image_format(&device->isl_dev, view->format) : + ISL_FORMAT_RAW; + + anv_fill_buffer_surface_state(device, view->storage_surface_state, + storage_format, + view->offset, view->range, + (storage_format == ISL_FORMAT_RAW ? 1 : + format->isl_layout->bs)); + + } else { + view->storage_surface_state = (struct anv_state){ 0 }; + } + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + + if (view->surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->surface_state); + + if (view->storage_surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->storage_surface_state); + + anv_free2(&device->alloc, pAllocator, view); +} + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) +{ + switch (aspect_mask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + /* Dragons will eat you. + * + * Meta attaches all destination surfaces as color render targets. Guess + * what surface the Meta Dragons really want. + */ + if (image->format->has_depth && image->format->has_stencil) { + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + return &image->color_surface; + } + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(image->format->has_depth); + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(image->format->has_stencil); + return &image->stencil_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + if (image->format->has_depth && image->format->has_stencil) { + /* FINISHME: The Vulkan spec (git a511ba2) requires support for + * combined depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + * + * Image views with both depth and stencil aspects are only valid for + * render target attachments, in which case + * cmd_buffer_emit_depth_stencil() will pick out both the depth and + * stencil surfaces from the underlying surface. + */ + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } + /* fallthrough */ + default: + unreachable("image does not have aspect"); + return NULL; + } +} + +static void +image_param_defaults(struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +void +anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + const struct isl_surf *surf = &view->image->color_surface.isl; + const int cpp = isl_format_get_layout(surf->format)->bs; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + param->size[0] = view->extent.width; + param->size[1] = view->extent.height; + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = view->extent.depth; + } else { + param->size[2] = surf->logical_level0_px.array_len - view->base_layer; + } + + isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, + ¶m->offset[0], ¶m->offset[1]); + + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = util_logbase2(512 / cpp); + param->tiling[1] = util_logbase2(8); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = util_logbase2(16 / cpp); + param->tiling[1] = util_logbase2(32); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_mip : 0); +} + +void +anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + param->stride[0] = isl_format_layouts[view->format].bs; + param->size[0] = view->range / param->stride[0]; +} diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c new file mode 100644 index 00000000000..d95d9afe8cf --- /dev/null +++ b/src/intel/vulkan/anv_intel.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + VkImage image_h; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = pCreateInfo->strideInBytes, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->format, + .extent = pCreateInfo->extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, &image_h); + + image = anv_image_from_handle(image_h); + image->bo = &mem->bo; + image->offset = 0; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c new file mode 100644 index 00000000000..82944ea1a92 --- /dev/null +++ b/src/intel/vulkan/anv_meta.c @@ -0,0 +1,169 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + + state->dynamic_mask = dynamic_mask; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_mask); +} + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; +} + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) +{ + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad VkImageViewType"); + } +} + +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->baseArrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + +VkResult +anv_device_init_meta(struct anv_device *device) +{ + VkResult result; + + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + goto fail_blit; + + return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + anv_device_finish_meta_resolve_state(device); + anv_device_finish_meta_clear_state(device); + anv_device_finish_meta_blit_state(device); +} diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h new file mode 100644 index 00000000000..d33e9e6d8ba --- /dev/null +++ b/src/intel/vulkan/anv_meta.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ANV_META_VERTEX_BINDING_COUNT 2 + +struct anv_meta_saved_state { + struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic + * state. + */ + uint32_t dynamic_mask; + struct anv_dynamic_state dynamic; +}; + +VkResult anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); + +VkResult anv_device_init_meta_resolve_state(struct anv_device *device); +void anv_device_finish_meta_resolve_state(struct anv_device *device); + +VkResult anv_device_init_meta_blit_state(struct anv_device *device); +void anv_device_finish_meta_blit_state(struct anv_device *device); + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask); + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer); + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image); + +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c new file mode 100644 index 00000000000..07ebcbc06b1 --- /dev/null +++ b/src/intel/vulkan/anv_meta_blit.c @@ -0,0 +1,1442 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "nir/nir_builder.h" + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static nir_shader * +build_nir_vertex_shader(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(vec4)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void +anv_device_finish_meta_blit_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for blitting, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c new file mode 100644 index 00000000000..739ae09582c --- /dev/null +++ b/src/intel/vulkan/anv_meta_clear.c @@ -0,0 +1,1098 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + VkClearColorValue color; +}; + +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; + +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t samples, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc, + bool use_repclear, + struct anv_pipeline **pipeline) +{ + VkDevice device_h = anv_device_to_handle(device); + VkResult result; + + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; + + VkPipeline pipeline_h = VK_NULL_HANDLE; + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = fs_nir ? 2 : 1, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = NULL, /* dynamic */ + .scissorCount = 1, + .pScissors = NULL, /* dynamic */ + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { ~0 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, + .use_repclear = use_repclear, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + alloc, + &pipeline_h); + + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; +} + +static VkResult +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir, frag_output); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct color_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; + blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = MAX_RTS, + .pAttachments = blend_attachment_state + }; + + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ + return + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); +} + +static void +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) +{ + if (!pipeline) + return; + + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); + } +} + +static void +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + struct anv_pipeline *pipeline = + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shader(struct nir_shader **out_vs) +{ + nir_builder vs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; +} + +static VkResult +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects, + uint32_t samples, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + + build_depthstencil_shader(&vs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct depthstencil_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct depthstencil_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthBoundsTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); +} + +static void +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(pass_att != VK_ATTACHMENT_UNUSED); + + const struct depthstencil_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + anv_pipeline_to_handle(pipeline)); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +VkResult +anv_device_init_meta_clear_state(struct anv_device *device) +{ + VkResult res; + struct anv_meta_state *state = &device->meta_state; + + zero(device->meta_state.clear); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; + + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_clear_state(device); + return res; +} + +/** + * The parameters mean that same as those in vkCmdClearAttachments. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att, clear_rect); + } else { + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + } +} + +static bool +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { + return true; + } + } + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + + return false; +} + +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ +void +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_meta_saved_state saved_state; + + if (!subpass_needs_clear(cmd_buffer)) + return; + + meta_clear_begin(&saved_state, cmd_buffer); + + if (cmd_state->framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; + + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[a].pending_clear_aspects = 0; + } + + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(image), + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + subpass_desc.pDepthStencilAttachment = &att_ref; + } + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); + } + } + } +} + +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c new file mode 100644 index 00000000000..ea5020c5f24 --- /dev/null +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -0,0 +1,867 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + v_position->data.origin_upper_left = true; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->texture = nir_deref_var_create(tex, u_tex); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + &device->meta_state.resolve.pass); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = device->meta_state.resolve.pass, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + get_pipeline_h(device, num_samples)); + if (result != VK_SUCCESS) + goto cleanup; + + goto cleanup; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + res = create_pass(device); + if (res != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + + uint32_t sample_count = 1 << (1 + i); + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image *src_image = src_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = (VkExtent2D) { fb->width, fb->height }, + }, + }); + + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; + struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); + + meta_resolve_save(&state, cmd_buffer); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + emit_resolve(cmd_buffer, + &src_iview, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); + } + } + + meta_resolve_restore(&state, cmd_buffer); +} + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h new file mode 100644 index 00000000000..a7ea3eb0e28 --- /dev/null +++ b/src/intel/vulkan/anv_nir.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); +void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c new file mode 100644 index 00000000000..e71a8ffb1f4 --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 8; + offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, + nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, + &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), + offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_remove(&intrin->instr); + *offset_src = nir_src_for_ssa(new_offset); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + + if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); + } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].range; + } + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c new file mode 100644 index 00000000000..c58a93878ee --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -0,0 +1,394 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "program/prog_parameter.h" +#include "nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + struct { + BITSET_WORD *used; + uint8_t *surface_offsets; + uint8_t *sampler_offsets; + uint8_t *image_offsets; + } set[MAX_SETS]; +}; + +static void +add_binding(struct apply_pipeline_layout_state *state, + uint32_t set, uint32_t binding) +{ + BITSET_SET(state->set[set].used, binding); +} + +static void +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) +{ + add_binding(state, var->data.descriptor_set, var->data.binding); +} + +static bool +get_used_bindings_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + add_binding(state, nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin)); + break; + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_samples: + add_var_binding(state, intrin->variables[0]->var); + break; + + default: + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + assert(tex->texture); + add_var_binding(state, tex->texture->var); + if (tex->sampler) + add_var_binding(state, tex->sampler->var); + break; + } + default: + continue; + } + } + + return true; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + + uint32_t surface_index = state->set[set].surface_offsets[binding]; + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->texture); + + unsigned set = tex->texture->var->data.descriptor_set; + unsigned binding = tex->texture->var->data.binding; + tex->texture_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->texture, &tex->texture_index, + nir_tex_src_texture_offset, state); + + if (tex->sampler) { + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + tex->sampler_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + } + + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + + cleanup_tex_deref(tex, tex->texture); + if (tex->sampler) + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + break; + default: + continue; + } + } + + return true; +} + +static void +setup_vec4_uniform_value(const union gl_constant_value **params, + const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +void +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct anv_pipeline_layout *layout = pipeline->layout; + + struct apply_pipeline_layout_state state = { + .shader = shader, + }; + + void *mem_ctx = ralloc_context(NULL); + + for (unsigned s = 0; s < layout->num_sets; s++) { + const unsigned count = layout->set[s].layout->binding_count; + const unsigned words = BITSET_WORDS(count); + state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); + state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); + } + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, get_used_bindings_block, &state); + } + + struct anv_pipeline_bind_map map = { + .surface_count = 0, + .sampler_count = 0, + }; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) + map.surface_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) + map.sampler_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) + map.image_count += set_layout->binding[b].array_size; + } + } + + map.surface_to_descriptor = + malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); + map.sampler_to_descriptor = + malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); + + pipeline->bindings[shader->stage] = map; + + unsigned surface = 0; + unsigned sampler = 0; + unsigned image = 0; + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { + state.set[set].surface_offsets[b] = surface; + for (unsigned i = 0; i < array_size; i++) { + map.surface_to_descriptor[surface + i].set = set; + map.surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { + state.set[set].sampler_offsets[b] = sampler; + for (unsigned i = 0; i < array_size; i++) { + map.sampler_to_descriptor[sampler + i].set = set; + map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { + state.set[set].image_offsets[b] = image; + image += array_size; + } + } + } + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (map.image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned image_index = state.set[set].image_offsets[binding]; + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE * 4; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map.image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; + } +} diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c b/src/intel/vulkan/anv_nir_lower_push_constants.c new file mode 100644 index 00000000000..53cd3d73793 --- /dev/null +++ b/src/intel/vulkan/anv_nir_lower_push_constants.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" + +struct lower_push_constants_state { + nir_shader *shader; + bool is_scalar; +}; + +static bool +lower_push_constants_block(nir_block *block, void *void_state) +{ + struct lower_push_constants_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* TODO: Handle indirect push constants */ + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + /* This wont work for vec4 stages. */ + assert(state->is_scalar); + + assert(intrin->const_index[0] % 4 == 0); + assert(intrin->const_index[1] == 128); + + /* We just turn them into uniform loads with the appropreate offset */ + intrin->intrinsic = nir_intrinsic_load_uniform; + } + + return true; +} + +void +anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) +{ + struct lower_push_constants_state state = { + .shader = shader, + .is_scalar = is_scalar, + }; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, lower_push_constants_block, &state); + } + + assert(shader->num_uniforms % 4 == 0); + if (is_scalar) + shader->num_uniforms /= 4; + else + shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); +} diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c new file mode 100644 index 00000000000..d07e9fec6cc --- /dev/null +++ b/src/intel/vulkan/anv_pass.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + uint32_t subpass_attachment_count = 0, *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + /* Count colorAttachmentCount again for resolve_attachments */ + desc->colorAttachmentCount; + } + + pass->subpass_attachments = + anv_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + anv_free2(&device->alloc, pAllocator, pass); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] + = desc->pInputAttachments[j].attachment; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] + = desc->pColorAttachments[j].attachment; + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = a; + if (a != VK_ATTACHMENT_UNUSED) + subpass->has_resolve = true; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } else { + subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; + } + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); + anv_free2(&device->alloc, pAllocator, pass); +} + +void anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c new file mode 100644 index 00000000000..a7feefb540e --- /dev/null +++ b/src/intel/vulkan/anv_pipeline.c @@ -0,0 +1,1278 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "util/mesa-sha1.h" +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "nir/spirv/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + _mesa_sha1_compute(module->data, module->size, module->sha1); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); + nir = entry_point->shader; + assert(nir->stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + + nir_lower_system_values(nir); + nir_validate_shader(nir); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entry_point->impl); + + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + + return nir; +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage, + spec_info); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->bindings[stage].image_count > 0) + prog_data->nr_params += pipeline->bindings[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ + prog_data->param = (const union gl_constant_value **) + malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + char surface_usage_mask[256], sampler_usage_mask[256]; + zero(surface_usage_mask); + zero(sampler_usage_mask); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + if (nir->stage != MESA_SHADER_VERTEX && + nir->stage != MESA_SHADER_TESS_CTRL && + nir->stage != MESA_SHADER_TESS_EVAL && + nir->stage != MESA_SHADER_FRAGMENT) { + nir = brw_nir_lower_io(nir, &pipeline->device->info, + compiler->scalar_stage[stage], false, NULL); + } + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_vs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = kernel; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = kernel; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_gs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + pipeline->gs_kernel = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; + nir_foreach_variable_safe(var, &nir->outputs) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + unsigned rt = var->data.location - FRAG_RESULT_DATA0; + if (rt >= key.nr_color_regions) { + var->data.mode = nir_var_local; + exec_node_remove(&var->node); + exec_list_push_tail(&impl->locals, &var->node); + } + } + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = kernel; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_cs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.work_groups_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + prog_data->base.total_shared = nir->num_shared; + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + pipeline->cs_simd = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c new file mode 100644 index 00000000000..c89bb2a2ee1 --- /dev/null +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -0,0 +1,405 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/debug.h" +#include "anv_private.h" + +/* Remaining work: + * + * - Compact binding table layout so it's tight and not dependent on + * descriptor set layout. + * + * - Review prog_data struct for size and cacheability: struct + * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 + * bit quantities etc; param, pull_param, and image_params are pointers, we + * just need the compation map. use bit fields for all bools, eg + * dual_src_blend. + */ + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); + + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->table[0]); + cache->table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. */ + if (cache->table == NULL) + cache->table_size = 0; + else + memset(cache->table, 0xff, byte_size); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); + free(cache->table); +} + +struct cache_entry { + unsigned char sha1[20]; + uint32_t prog_data_size; + uint32_t kernel_size; + char prog_data[0]; + + /* kernel follows prog_data at next 64 byte aligned address */ +}; + +void +anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + struct mesa_sha1 *ctx; + + ctx = _mesa_sha1_init(); + _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + /* hash in shader stage, pipeline layout? */ + if (spec_info) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_final(ctx, hash); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + const uint32_t offset = cache->table[index]; + + if (offset == ~0) + return NO_KERNEL; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + if (prog_data) + memcpy(prog_data, entry->prog_data, entry->prog_data_size); + + const uint32_t preamble_size = + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + return offset + preamble_size; + } + } + + return NO_KERNEL; +} + +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) entry->sha1); + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (cache->table[index] == ~0) { + cache->table[index] = entry_offset; + break; + } + } + + /* We don't include the alignment padding bytes when we serialize, so + * don't include taht in the the total size. */ + cache->total_size += + sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->kernel_count++; +} + +static VkResult +anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->table[0]); + uint32_t *table; + uint32_t *old_table = cache->table; + + table = malloc(byte_size); + if (table == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cache->table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->table, 0xff, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + const uint32_t offset = old_table[i]; + if (offset == ~0) + continue; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + anv_pipeline_cache_add_entry(cache, entry, offset); + } + + free(old_table); + + return VK_SUCCESS; +} + +uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, size_t kernel_size, + const void *prog_data, size_t prog_data_size) +{ + pthread_mutex_lock(&cache->mutex); + struct cache_entry *entry; + + /* Meta pipelines don't have SPIR-V, so we can't hash them. + * Consequentally, they just don't get cached. + */ + const uint32_t preamble_size = sha1 ? + align_u32(sizeof(*entry) + prog_data_size, 64) : + 0; + + const uint32_t size = preamble_size + kernel_size; + + assert(size < cache->program_stream.block_pool->block_size); + const struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + entry = state.map; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, prog_data, prog_data_size); + entry->kernel_size = kernel_size; + + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_add_entry(cache, entry, state.offset); + } + + pthread_mutex_unlock(&cache->mutex); + + memcpy(state.map + preamble_size, kernel, kernel_size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset + preamble_size; +} + +static void +anv_pipeline_cache_load(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + struct anv_device *device = cache->device; + uint8_t uuid[VK_UUID_SIZE]; + struct { + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; + } header; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.device_id != device->chipset_id) + return; + anv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) + return; + + const void *end = data + size; + const void *p = data + sizeof(header); + + while (p < end) { + /* The kernels aren't 64 byte aligned in the serialized format so + * they're always right after the prog_data. + */ + const struct cache_entry *entry = p; + const void *kernel = &entry->prog_data[entry->prog_data_size]; + + anv_pipeline_cache_upload_kernel(cache, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + p = kernel + entry->kernel_size; + } +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) + anv_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice _device, + VkPipelineCache _cache, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) { + *pDataSize = 0; + return VK_INCOMPLETE; + } + + void *p = pData; + memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); + p += sizeof(device->chipset_id); + + anv_device_get_cache_uuid(p); + p += VK_UUID_SIZE; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (cache->table[i] == ~0) + continue; + + entry = cache->program_stream.block_pool->map + cache->table[i]; + + memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); + p += sizeof(*entry) + entry->prog_data_size; + + void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + memcpy(p, kernel, entry->kernel_size); + p += entry->kernel_size; + } + + return VK_SUCCESS; +} + +static void +anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, + struct anv_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + if (src->table[i] == ~0) + continue; + + struct cache_entry *entry = + src->program_stream.block_pool->map + src->table[i]; + + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + continue; + + const void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + anv_pipeline_cache_upload_kernel(dst, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + } +} + +VkResult anv_MergePipelineCaches( + VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + + anv_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h new file mode 100644 index 00000000000..ba86333525e --- /dev/null +++ b/src/intel/vulkan/anv_private.h @@ -0,0 +1,1876 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" +#include "util/list.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#define VK_USE_PLATFORM_XCB_KHR +#define VK_USE_PLATFORM_WAYLAND_KHR + +#define VK_PROTOTYPES +#include +#include +#include + +#include "anv_entrypoints.h" +#include "anv_gen_macros.h" +#include "brw_context.h" +#include "isl/isl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline float +anv_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +#define typed_memcpy(dest, src, count) ({ \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ +}) + +#define zero(x) (memset(&(x), 0, sizeof(x))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); + +#ifdef DEBUG +#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); +#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); +#else +#define vk_error(error) error +#define vk_errorf(error, format, ...) error +#endif + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +/** + * If a block of code is annotated with anv_validate, then the block runs only + * in debug builds. + */ +#ifdef DEBUG +#define anv_validate if (1) +#else +#define anv_validate if (0) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void * +anv_vector_head(struct anv_vector *vector) +{ + assert(vector->tail < vector->head); + return (void *)((char *)vector->data + + ((vector->head - vector->element_size) & + (vector->size - 1))); +} + +static inline void * +anv_vector_tail(struct anv_vector *vector) +{ + return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + uint32_t gem_handle; + + /* Index into the current validation list. This is used by the + * validation list building alrogithm to track which buffers are already + * in the validation list so that we can ensure uniqueness. + */ + uint32_t index; + + /* Last known offset. This value is provided by the kernel when we + * execbuf and is used as the presumed offset for the next bunch of + * relocations. + */ + uint64_t offset; + + uint64_t size; + void *map; + + /* We need to set the WRITE flag on winsys bos so GEM will know we're + * writing to them and synchronize uses on other rings (eg if the display + * server uses the blitter ring). + */ + bool is_winsys_bo; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + int32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + + /* The offset from the start of the bo to the "center" of the block + * pool. Pointers to allocated blocks are given by + * bo.map + center_bo_offset + offsets. + */ + uint32_t center_bo_offset; + + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + */ + void *map; + int fd; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + union anv_free_list free_list; + struct anv_block_state state; + + union anv_free_list back_free_list; + struct anv_block_state back_state; +}; + +/* Block pools are backed by a fixed-size 2GB memfd */ +#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) + +/* The center of the block pool is also the middle of the memfd. This may + * change in the future if we decide differently for some reason. + */ +#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) + +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->state.end + pool->back_state.end; +} + +struct anv_state { + int32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream_block; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + + /* The current working block */ + struct anv_state_stream_block *block; + + /* Offset at which the current block starts */ + uint32_t start; + /* Offset at which to allocate the next state */ + uint32_t next; + /* Offset at which the current block ends */ + uint32_t end; +}; + +#define CACHELINE_SIZE 64 +#define CACHELINE_MASK 63 + +static inline void +anv_clflush_range(void *start, size_t size) +{ + void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); + void *end = start + size; + + __builtin_ia32_mfence(); + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } +} + +static void inline +anv_state_clflush(struct anv_state state) +{ + anv_clflush_range(state.map, state.alloc_size); +} + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + + +void *anv_resolve_entrypoint(uint32_t index); + +extern struct anv_dispatch_table dtable; + +#define ANV_CALL(func) ({ \ + if (dtable.func == NULL) { \ + size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ + dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ + } \ + dtable.func; \ +}) + +static inline void * +anv_alloc(const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnAllocation(alloc->pUserData, size, align, scope); +} + +static inline void * +anv_realloc(const VkAllocationCallbacks *alloc, + void *ptr, size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); +} + +static inline void +anv_free(const VkAllocationCallbacks *alloc, void *data) +{ + alloc->pfnFree(alloc->pUserData, data); +} + +static inline void * +anv_alloc2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + if (alloc) + return anv_alloc(alloc, size, align, scope); + else + return anv_alloc(parent_alloc, size, align, scope); +} + +static inline void +anv_free2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + void *data) +{ + if (alloc) + anv_free(alloc, data); + else + anv_free(parent_alloc, data); +} + +struct anv_physical_device { + VK_LOADER_DATA _loader_data; + + struct anv_instance * instance; + uint32_t chipset_id; + const char * path; + const char * name; + const struct brw_device_info * info; + uint64_t aperture_size; + struct brw_compiler * compiler; + struct isl_device isl_dev; +}; + +struct anv_wsi_interaface; + +#define VK_ICD_WSI_PLATFORM_MAX 5 + +struct anv_instance { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + uint32_t apiVersion; + int physicalDeviceCount; + struct anv_physical_device physicalDevice; + + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; +}; + +VkResult anv_init_wsi(struct anv_instance *instance); +void anv_finish_wsi(struct anv_instance *instance); + +struct anv_meta_state { + VkAllocationCallbacks alloc; + + /** + * Use array element `i` for images with `2^i` samples. + */ + struct { + /** + * Pipeline N is used to clear color attachment N of the current + * subpass. + * + * HACK: We use one pipeline per color attachment to work around the + * compiler's inability to dynamically set the render target index of + * the render target write message. + */ + struct anv_pipeline *color_pipelines[MAX_RTS]; + + struct anv_pipeline *depth_only_pipeline; + struct anv_pipeline *stencil_only_pipeline; + struct anv_pipeline *depthstencil_pipeline; + } clear[1 + MAX_SAMPLES_LOG2]; + + struct { + VkRenderPass render_pass; + + /** Pipeline that blits from a 1D image. */ + VkPipeline pipeline_1d_src; + + /** Pipeline that blits from a 2D image. */ + VkPipeline pipeline_2d_src; + + /** Pipeline that blits from a 3D image. */ + VkPipeline pipeline_3d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + /** Pipeline [i] resolves an image with 2^(i+1) samples. */ + VkPipeline pipelines[MAX_SAMPLES_LOG2]; + + VkRenderPass pass; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } resolve; +}; + +struct anv_queue { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_state_pool * pool; +}; + +struct anv_pipeline_cache { + struct anv_device * device; + struct anv_state_stream program_stream; + pthread_mutex_t mutex; + + uint32_t total_size; + uint32_t table_size; + uint32_t kernel_count; + uint32_t *table; +}; + +void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device); +void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); +uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data); +uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, + size_t kernel_size, + const void *prog_data, + size_t prog_data_size); + +struct anv_device { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + struct isl_device isl_dev; + int context_id; + int fd; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_pipeline_cache default_pipeline_cache; + + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_bo workaround_bo; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + pthread_mutex_t mutex; +}; + +VkResult gen7_init_device_state(struct anv_device *device); +VkResult gen75_init_device_state(struct anv_device *device); +VkResult gen8_init_device_state(struct anv_device *device); +VkResult gen9_init_device_state(struct anv_device *device); + +void anv_device_get_cache_uuid(void *uuid); + + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); +int anv_gem_get_aperture(int fd, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); +void anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* Last seen surface state block pool bo offset */ + uint32_t last_ss_pool_bo_offset; + + struct anv_reloc_list relocs; +}; + +struct anv_batch { + const VkAllocationCallbacks * alloc; + + void * start; + void * end; + void * next; + + struct anv_reloc_list * relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); +VkResult anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return address.offset + delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +/* Wrapper macros needed to work around preprocessor argument issues. In + * particular, arguments don't get pre-evaluated if they are concatenated. + * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the + * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". + * We can work around this easily enough with these helpers. + */ +#define __anv_cmd_length(cmd) cmd ## _length +#define __anv_cmd_length_bias(cmd) cmd ## _length_bias +#define __anv_cmd_header(cmd) cmd ## _header +#define __anv_cmd_pack(cmd) cmd ## _pack + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + .DWordLength = n - __anv_cmd_length_bias(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ + const uint32_t __size = __anv_cmd_length(cmd) * 4; \ + struct anv_state __state = \ + anv_state_pool_alloc((pool), __size, align); \ + struct cmd __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ + if (!(pool)->block_pool->device->info.has_llc) \ + anv_state_clflush(__state); \ + __state; \ + }) + +#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ + .GraphicsDataTypeGFDT = 0, \ + .LLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ + .LLCeLLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ + +#define GEN9_MOCS { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ + } + +#define GEN9_MOCS_PTE { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ + } + +struct anv_device_memory { + struct anv_bo bo; + uint32_t type_index; + VkDeviceSize map_size; + void * map; +}; + +/** + * Header for Vertex URB Entry (VUE) + */ +struct anv_vue_header { + uint32_t Reserved; + uint32_t RTAIndex; /* RenderTargetArrayIndex */ + uint32_t ViewportIndex; + float PointWidth; +}; + +struct anv_descriptor_set_binding_layout { + /* Number of array elements in this binding */ + uint16_t array_size; + + /* Index into the flattend descriptor set */ + uint16_t descriptor_index; + + /* Index into the dynamic state array for a dynamic buffer */ + int16_t dynamic_offset_index; + + /* Index into the descriptor set buffer views */ + int16_t buffer_index; + + struct { + /* Index into the binding table for the associated surface */ + int16_t surface_index; + + /* Index into the sampler table for the associated sampler */ + int16_t sampler_index; + + /* Index into the image table for the associated image */ + int16_t image_index; + } stage[MESA_SHADER_STAGES]; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct anv_sampler **immutable_samplers; +}; + +struct anv_descriptor_set_layout { + /* Number of bindings in this descriptor set */ + uint16_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint16_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of buffers in this descriptor set */ + uint16_t buffer_count; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; + + /* Bindings in this descriptor set */ + struct anv_descriptor_set_binding_layout binding[0]; +}; + +struct anv_descriptor { + VkDescriptorType type; + + union { + struct { + struct anv_image_view *image_view; + struct anv_sampler *sampler; + }; + + struct anv_buffer_view *buffer_view; + }; +}; + +struct anv_descriptor_set { + const struct anv_descriptor_set_layout *layout; + uint32_t buffer_count; + struct anv_buffer_view *buffer_views; + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +struct anv_pipeline_binding { + /* The descriptor set this surface corresponds to */ + uint16_t set; + + /* Offset into the descriptor set */ + uint16_t offset; +}; + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + bool has_dynamic_offsets; + } stage[MESA_SHADER_STAGES]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + VkBufferUsageFlags usage; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +enum anv_cmd_dirty_bits { + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, +}; +typedef uint32_t anv_cmd_dirty_mask_t; + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_push_constants { + /* Current allocated size of this push constants data structure. + * Because a decent chunk of it may not be used (images on SKL, for + * instance), we won't actually allocate the entire structure up-front. + */ + uint32_t size; + + /* Push constant data provided by the client through vkPushConstants */ + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + + /* Our hardware only provides zero-based vertex and instance id so, in + * order to satisfy the vulkan requirements, we may have to push one or + * both of these into the shader. + */ + uint32_t base_vertex; + uint32_t base_instance; + + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; + + /* Image data for image_load_store on pre-SKL */ + struct brw_image_param images[MAX_IMAGES]; +}; + +struct anv_dynamic_state { + struct { + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; + } viewport; + + struct { + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; + } scissor; + + float line_width; + + struct { + float bias; + float clamp; + float slope; + } depth_bias; + + float blend_constants[4]; + + struct { + float min; + float max; + } depth_bounds; + + struct { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_reference; +}; + +extern const struct anv_dynamic_state default_dynamic_state; + +void anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); + +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct anv_attachment_state { + VkImageAspectFlags pending_clear_aspects; + VkClearValue clear_value; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + /* PIPELINE_SELECT.PipelineSelection */ + uint32_t current_pipeline; + uint32_t current_l3_config; + uint32_t vb_dirty; + anv_cmd_dirty_mask_t dirty; + anv_cmd_dirty_mask_t compute_dirty; + uint32_t num_workgroups_offset; + struct anv_bo *num_workgroups_bo; + VkShaderStageFlags descriptors_dirty; + VkShaderStageFlags push_constants_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + uint32_t restart_index; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set * descriptors[MAX_SETS]; + struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_state binding_tables[MESA_SHADER_STAGES]; + struct anv_state samplers[MESA_SHADER_STAGES]; + struct anv_dynamic_state dynamic; + bool need_query_wa; + + /** + * Array length is anv_cmd_state::pass::attachment_count. Array content is + * valid only when recording a render pass instance. + */ + struct anv_attachment_state * attachments; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ + uint32_t index_offset; + } gen7; +}; + +struct anv_cmd_pool { + VkAllocationCallbacks alloc; + struct list_head cmd_buffers; +}; + +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + +struct anv_cmd_buffer { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_cmd_pool * pool; + struct list_head pool_link; + + struct anv_batch batch; + + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ + struct list_head batch_bos; + enum anv_cmd_buffer_exec_mode exec_mode; + + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + + /* A vector of int32_t's for every block of binding tables. + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector bt_blocks; + uint32_t bt_next; + struct anv_reloc_list surface_relocs; + + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). + */ + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ + uint32_t serial; + + /* Stream objects for storing temporary data */ + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + + struct anv_cmd_state state; +}; + +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); + +VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state); +VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state); +uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages); + +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment); +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment); + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); + +void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info); + +void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage); +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); + +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_event { + uint64_t semaphore; + struct anv_state state; +}; + +struct nir_shader; + +struct anv_shader_module { + struct nir_shader * nir; + + unsigned char sha1[20]; + uint32_t size; + char data[0]; +}; + +void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + +#define anv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ + stage = __builtin_ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) + +struct anv_pipeline_bind_map { + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + + struct anv_pipeline_binding * surface_to_descriptor; + struct anv_pipeline_binding * sampler_to_descriptor; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[512]; + struct anv_reloc_list batch_relocs; + uint32_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; + + struct anv_pipeline_layout * layout; + struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; + + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + uint32_t scratch_start[MESA_SHADER_STAGES]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + VkShaderStageFlags active_stages; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t vs_vec4; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t ps_ksp0; + uint32_t ps_ksp2; + uint32_t ps_grf_start0; + uint32_t ps_grf_start2; + uint32_t gs_kernel; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + bool instancing_enable[MAX_VBS]; + bool primitive_restart; + uint32_t topology; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; + + struct { + uint32_t sf[7]; + uint32_t depth_stencil_state[3]; + } gen7; + + struct { + uint32_t sf[4]; + uint32_t raster[5]; + uint32_t wm_depth_stencil[3]; + } gen8; + + struct { + uint32_t wm_depth_stencil[4]; + } gen9; +}; + +struct anv_graphics_pipeline_create_info { + /** + * If non-negative, overrides the color attachment count of the pipeline's + * subpass. + */ + int8_t color_attachment_count; + + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc); + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +VkResult +anv_graphics_pipeline_create(VkDevice device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen7_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen75_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen7_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen75_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +struct anv_format_swizzle { + unsigned r:2; + unsigned g:2; + unsigned b:2; + unsigned a:2; +}; + +struct anv_format { + const VkFormat vk_format; + const char *name; + enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + const struct isl_format_layout *isl_layout; + struct anv_format_swizzle swizzle; + bool has_depth; + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle); + +static inline bool +anv_format_is_color(const struct anv_format *format) +{ + return !format->has_depth && !format->has_stencil; +} + +static inline bool +anv_format_is_depth_or_stencil(const struct anv_format *format) +{ + return format->has_depth || format->has_stencil; +} + +/** + * Subsurface of an anv_image. + */ +struct anv_surface { + struct isl_surf isl; + + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; +}; + +struct anv_image { + VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + const struct anv_format *format; + VkExtent3D extent; + uint32_t levels; + uint32_t array_size; + uint32_t samples; /**< VkImageCreateInfo::samples */ + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + /** + * Image subsurfaces + * + * For each foo, anv_image::foo_surface is valid if and only if + * anv_image::format has a foo aspect. + * + * The hardware requires that the depth buffer and stencil buffer be + * separate surfaces. From Vulkan's perspective, though, depth and stencil + * reside in the same VkImage. To satisfy both the hardware and Vulkan, we + * allocate the depth and stencil buffers as separate surfaces in the same + * bo. + */ + union { + struct anv_surface color_surface; + + struct { + struct anv_surface depth_surface; + struct anv_surface stencil_surface; + }; + }; +}; + +struct anv_image_view { + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + VkComponentMapping swizzle; + enum isl_format format; + uint32_t base_layer; + uint32_t base_mip; + VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + /** RENDER_SURFACE_STATE when using image as a color render target. */ + struct anv_state color_rt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a sampler surface. */ + struct anv_state sampler_surface_state; + + /** RENDER_SURFACE_STATE when using image as a storage image. */ + struct anv_state storage_surface_state; +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + isl_tiling_flags_t isl_tiling_flags; + uint32_t stride; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + const VkAllocationCallbacks* alloc, + VkImage *pImage); + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, + VkImageAspectFlags aspect_mask); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset); + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen7_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen75_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen8_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen9_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); + +struct anv_buffer_view { + enum isl_format format; /**< VkBufferViewCreateInfo::format */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + uint64_t range; /**< VkBufferViewCreateInfo::range */ + + struct anv_state surface_state; + struct anv_state storage_surface_state; +}; + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type); + +void anv_fill_buffer_surface_state(struct anv_device *device, + struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void gen7_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen75_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen8_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen9_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param); +void anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct anv_image_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; +}; + +struct anv_render_pass_attachment { + const struct anv_format *format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + uint32_t * subpass_attachments; + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +extern struct anv_render_pass anv_meta_dummy_renderpass; + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +void anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *)(uintptr_t) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType)(uintptr_t) _obj; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c new file mode 100644 index 00000000000..e45b519c0f3 --- /dev/null +++ b/src/intel/vulkan/anv_query.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + uint32_t slot_size; + uint64_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_ERROR_INCOMPATIBLE_DRIVER; + default: + assert(!"Invalid query type"); + } + + slot_size = sizeof(struct anv_query_pool_slot); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + pool->slots = pCreateInfo->queryCount; + + size = pCreateInfo->queryCount * slot_size; + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, pool); + + return result; +} + +void anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void* pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + int64_t timeout = INT64_MAX; + uint64_t result; + int ret; + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem_wait failed %m"); + } + } + + void *data_end = pData + dataSize; + struct anv_query_pool_slot *slot = pool->bo.map; + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + result = slot[firstQuery + i].end - slot[firstQuery + i].begin; + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + unreachable("pipeline stats not supported"); + case VK_QUERY_TYPE_TIMESTAMP: { + result = slot[firstQuery + i].begin; + break; + } + default: + unreachable("invalid pool type"); + } + + if (flags & VK_QUERY_RESULT_64_BIT) { + uint64_t *dst = pData; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } else { + uint32_t *dst = pData; + if (result > UINT32_MAX) + result = UINT32_MAX; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } + + pData += stride; + if (pData >= data_end) + break; + } + + return VK_SUCCESS; +} + +void anv_CmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: { + struct anv_query_pool_slot *slot = pool->bo.map; + slot[firstQuery + i].available = 0; + break; + } + default: + assert(!"Invalid query type"); + } + } +} diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c new file mode 100644 index 00000000000..22fd01c9495 --- /dev/null +++ b/src/intel/vulkan/anv_util.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +VkResult +__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + + /* Core errors */ + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + + /* Extension errors */ + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(util_is_power_of_two(size)); + assert(element_size < size && util_is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c new file mode 100644 index 00000000000..c5911a3635b --- /dev/null +++ b/src/intel/vulkan/anv_wsi.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_wsi.h" + +VkResult +anv_init_wsi(struct anv_instance *instance) +{ + VkResult result; + + result = anv_x11_init_wsi(instance); + if (result != VK_SUCCESS) + return result; + +#ifdef HAVE_WAYLAND_PLATFORM + result = anv_wl_init_wsi(instance); + if (result != VK_SUCCESS) { + anv_x11_finish_wsi(instance); + return result; + } +#endif + + return VK_SUCCESS; +} + +void +anv_finish_wsi(struct anv_instance *instance) +{ +#ifdef HAVE_WAYLAND_PLATFORM + anv_wl_finish_wsi(instance); +#endif + anv_x11_finish_wsi(instance); +} + +void anv_DestroySurfaceKHR( + VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + + anv_free2(&instance->alloc, pAllocator, surface); +} + +VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR _surface, + VkBool32* pSupported) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_support(surface, device, queueFamilyIndex, pSupported); +} + +VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_capabilities(surface, device, pSurfaceCapabilities); +} + +VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); +} + +VkResult anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + struct anv_swapchain *swapchain; + + VkResult result = iface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); + if (result != VK_SUCCESS) + return result; + + *pSwapchain = anv_swapchain_to_handle(swapchain); + + return VK_SUCCESS; +} + +void anv_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + swapchain->destroy(swapchain, pAllocator); +} + +VkResult anv_GetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult anv_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->acquire_next_image(swapchain, timeout, semaphore, + pImageIndex); +} + +VkResult anv_QueuePresentKHR( + VkQueue _queue, + const VkPresentInfoKHR* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + VkResult result; + + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); + + assert(swapchain->device == queue->device); + + result = swapchain->queue_present(swapchain, queue, + pPresentInfo->pImageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_wsi.h b/src/intel/vulkan/anv_wsi.h new file mode 100644 index 00000000000..6e9ff9b8447 --- /dev/null +++ b/src/intel/vulkan/anv_wsi.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +struct anv_swapchain; + +struct anv_wsi_interface { + VkResult (*get_support)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported); + VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + VkResult (*get_formats)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); +}; + +struct anv_swapchain { + struct anv_device *device; + + VkResult (*destroy)(struct anv_swapchain *swapchain, + const VkAllocationCallbacks *pAllocator); + VkResult (*get_images)(struct anv_swapchain *swapchain, + uint32_t *pCount, VkImage *pSwapchainImages); + VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, + uint64_t timeout, VkSemaphore semaphore, + uint32_t *image_index); + VkResult (*queue_present)(struct anv_swapchain *swap_chain, + struct anv_queue *queue, + uint32_t image_index); +}; + +ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) + +VkResult anv_x11_init_wsi(struct anv_instance *instance); +void anv_x11_finish_wsi(struct anv_instance *instance); +VkResult anv_wl_init_wsi(struct anv_instance *instance); +void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/intel/vulkan/anv_wsi_wayland.c b/src/intel/vulkan/anv_wsi_wayland.c new file mode 100644 index 00000000000..6f25eaf43ea --- /dev/null +++ b/src/intel/vulkan/anv_wsi_wayland.c @@ -0,0 +1,871 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_wsi.h" + +#include + +#define MIN_NUM_IMAGES 2 + +struct wsi_wl_display { + struct wl_display * display; + struct wl_drm * drm; + + /* Vector of VkFormats supported */ + struct anv_vector formats; + + uint32_t capabilities; +}; + +struct wsi_wayland { + struct anv_wsi_interface base; + + struct anv_instance * instance; + + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; +}; + +static void +wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) +{ + /* Don't add a format that's already in the list */ + VkFormat *f; + anv_vector_foreach(f, &display->formats) + if (*f == format) + return; + + /* Don't add formats which aren't supported by the driver */ + if (anv_format_for_vk_format(format)->isl_format == + ISL_FORMAT_UNSUPPORTED) { + return; + } + + f = anv_vector_add(&display->formats); + if (f) + *f = format; +} + +static void +drm_handle_device(void *data, struct wl_drm *drm, const char *name) +{ + fprintf(stderr, "wl_drm.device(%s)\n", name); +} + +static uint32_t +wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) +{ + switch (vk_format) { + /* TODO: Figure out what all the formats mean and make this table + * correct. + */ +#if 0 + case VK_FORMAT_R4G4B4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; + case VK_FORMAT_R5G6B5_UNORM: + return WL_DRM_FORMAT_BGR565; + case VK_FORMAT_R5G5B5A1_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; + case VK_FORMAT_R8G8B8_UNORM: + return WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R8G8B8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R10G10B10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; + case VK_FORMAT_B4G4R4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; + case VK_FORMAT_B5G6R5_UNORM: + return WL_DRM_FORMAT_RGB565; + case VK_FORMAT_B5G5R5A1_UNORM: + return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; +#endif + case VK_FORMAT_B8G8R8_SRGB: + return WL_DRM_FORMAT_BGRX8888; + case VK_FORMAT_B8G8R8A8_SRGB: + return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; +#if 0 + case VK_FORMAT_B10G10R10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; +#endif + + default: + assert("!Unsupported Vulkan format"); + return 0; + } +} + +static void +drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) +{ + struct wsi_wl_display *display = data; + + switch (wl_format) { +#if 0 + case WL_DRM_FORMAT_ABGR4444: + case WL_DRM_FORMAT_XBGR4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); + break; + case WL_DRM_FORMAT_BGR565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); + break; + case WL_DRM_FORMAT_ABGR1555: + case WL_DRM_FORMAT_XBGR1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); + break; + case WL_DRM_FORMAT_XBGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ABGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); + break; + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); + break; + case WL_DRM_FORMAT_ARGB4444: + case WL_DRM_FORMAT_XRGB4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); + break; + case WL_DRM_FORMAT_RGB565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); + break; + case WL_DRM_FORMAT_ARGB1555: + case WL_DRM_FORMAT_XRGB1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); + break; +#endif + case WL_DRM_FORMAT_XRGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); + /* fallthrough */ + case WL_DRM_FORMAT_ARGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); + break; +#if 0 + case WL_DRM_FORMAT_ARGB2101010: + case WL_DRM_FORMAT_XRGB2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); + break; +#endif + } +} + +static void +drm_handle_authenticated(void *data, struct wl_drm *drm) +{ +} + +static void +drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) +{ + struct wsi_wl_display *display = data; + + display->capabilities = capabilities; +} + +static const struct wl_drm_listener drm_listener = { + drm_handle_device, + drm_handle_format, + drm_handle_authenticated, + drm_handle_capabilities, +}; + +static void +registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct wsi_wl_display *display = data; + + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm == NULL); + + assert(version >= 2); + display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); + + if (display->drm) + wl_drm_add_listener(display->drm, &drm_listener, display); + } +} + +static void +registry_handle_global_remove(void *data, struct wl_registry *registry, + uint32_t name) +{ /* No-op */ } + +static const struct wl_registry_listener registry_listener = { + registry_handle_global, + registry_handle_global_remove +}; + +static void +wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) +{ + anv_vector_finish(&display->formats); + if (display->drm) + wl_drm_destroy(display->drm); + anv_free(&wsi->instance->alloc, display); +} + +static struct wsi_wl_display * +wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + struct wsi_wl_display *display = + anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!display) + return NULL; + + memset(display, 0, sizeof(*display)); + + display->display = wl_display; + + if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) + goto fail; + + struct wl_registry *registry = wl_display_get_registry(wl_display); + if (!registry) + return NULL; + + wl_registry_add_listener(registry, ®istry_listener, display); + + /* Round-rip to get the wl_drm global */ + wl_display_roundtrip(wl_display); + + if (!display->drm) + goto fail; + + /* Round-rip to get wl_drm formats and capabilities */ + wl_display_roundtrip(wl_display); + + /* We need prime support */ + if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) + goto fail; + + /* We don't need this anymore */ + wl_registry_destroy(registry); + + return display; + +fail: + if (registry) + wl_registry_destroy(registry); + + wsi_wl_display_destroy(wsi, display); + return NULL; +} + +static struct wsi_wl_display * +wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, + wl_display); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->displays, wl_display); + if (entry) { + /* Oops, someone raced us to it */ + wsi_wl_display_destroy(wsi, display); + } else { + entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_display(physical_device->instance, display) != NULL; +} + +static VkResult +wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + *pSupported = true; + + return VK_SUCCESS; +} + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_KHR, +}; + +static VkResult +wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* caps) +{ + caps->minImageCount = MIN_NUM_IMAGES; + caps->maxImageCount = 4; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + + caps->supportedCompositeAlpha = + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_display *display = + wsi_wl_get_display(device->instance, surface->display); + + uint32_t count = anv_vector_length(&display->formats); + + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = count; + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= count); + *pSurfaceFormatCount = count; + + VkFormat *f; + anv_vector_foreach(f, &display->formats) { + *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { + .format = *f, + /* TODO: We should get this from the compositor somehow */ + .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + }; + } + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateWaylandSurfaceKHR( + VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceWayland *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; + surface->display = pCreateInfo->display; + surface->surface = pCreateInfo->surface; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct wsi_wl_image { + struct anv_image * image; + struct anv_device_memory * memory; + struct wl_buffer * buffer; + bool busy; +}; + +struct wsi_wl_swapchain { + struct anv_swapchain base; + + struct wsi_wl_display * display; + struct wl_event_queue * queue; + struct wl_surface * surface; + + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; + + VkPresentModeKHR present_mode; + bool fifo_ready; + + uint32_t image_count; + struct wsi_wl_image images[0]; +}; + +static VkResult +wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + int ret = wl_display_dispatch_queue_pending(chain->display->display, + chain->queue); + /* XXX: I'm not sure if out-of-date is the right error here. If + * wl_display_dispatch_queue_pending fails it most likely means we got + * kicked by the server so this seems more-or-less correct. + */ + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + *image_index = i; + return VK_SUCCESS; + } + } + + /* This time we do a blocking dispatch because we can't go + * anywhere until we get an event. + */ + int ret = wl_display_roundtrip_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } +} + +static void +frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) +{ + struct wsi_wl_swapchain *chain = data; + + chain->fifo_ready = true; + + wl_callback_destroy(callback); +} + +static const struct wl_callback_listener frame_listener = { + frame_handle_done, +}; + +static VkResult +wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + } + + assert(image_index < chain->image_count); + wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); + wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + struct wl_callback *frame = wl_surface_frame(chain->surface); + wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); + wl_callback_add_listener(frame, &frame_listener, chain); + chain->fifo_ready = false; + } + + chain->images[image_index].busy = true; + wl_surface_commit(chain->surface); + wl_display_flush(chain->display->display); + + return VK_SUCCESS; +} + +static void +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), + pAllocator); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), + pAllocator); +} + +static void +buffer_handle_release(void *data, struct wl_buffer *buffer) +{ + struct wsi_wl_image *image = data; + + assert(image->buffer == buffer); + + image->busy = false; +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static VkResult +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + VkResult result; + + VkImage vk_image; + result = anv_image_create(vk_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = chain->vk_format, + .extent = { + .width = chain->extent.width, + .height = chain->extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, + &vk_image); + + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(vk_image); + assert(anv_format_is_color(image->image->format)); + + struct anv_surface *surface = &image->image->color_surface; + + VkDeviceMemory vk_memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + pAllocator, + &vk_memory); + + if (result != VK_SUCCESS) + goto fail_image; + + image->memory = anv_device_memory_from_handle(vk_memory); + image->memory->bo.is_winsys_bo = true; + + result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); + + if (result != VK_SUCCESS) + goto fail_mem; + + int ret = anv_gem_set_tiling(chain->base.device, + image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + int fd = anv_gem_handle_to_fd(chain->base.device, + image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + image->buffer = wl_drm_create_prime_buffer(chain->display->drm, + fd, /* name */ + chain->extent.width, + chain->extent.height, + chain->drm_format, + surface->offset, + surface->isl.row_pitch, + 0, 0, 0, 0 /* unused */); + wl_display_roundtrip(chain->display->display); + close(fd); + + wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); + wl_buffer_add_listener(image->buffer, &buffer_listener, image); + + return VK_SUCCESS; + +fail_mem: + anv_FreeMemory(vk_device, vk_memory, pAllocator); +fail_image: + anv_DestroyImage(vk_device, vk_image, pAllocator); + + return result; +} + +static VkResult +wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) { + if (chain->images[i].buffer) + wsi_wl_image_finish(chain, &chain->images[i], pAllocator); + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_swapchain *chain; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + int num_images = pCreateInfo->minImageCount; + + assert(num_images >= MIN_NUM_IMAGES); + + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = wsi_wl_swapchain_destroy; + chain->base.get_images = wsi_wl_swapchain_get_images; + chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; + chain->base.queue_present = wsi_wl_swapchain_queue_present; + + chain->surface = surface->surface; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; + chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); + + chain->present_mode = pCreateInfo->presentMode; + chain->fifo_ready = true; + + chain->image_count = num_images; + + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ + for (uint32_t i = 0; i < chain->image_count; i++) + chain->images[i].buffer = NULL; + chain->queue = NULL; + + chain->display = wsi_wl_get_display(device->instance, surface->display); + if (!chain->display) + goto fail; + + chain->queue = wl_display_create_queue(chain->display->display); + if (!chain->queue) + goto fail; + + for (uint32_t i = 0; i < chain->image_count; i++) { + result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); + if (result != VK_SUCCESS) + goto fail; + chain->images[i].busy = false; + } + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + +fail: + wsi_wl_swapchain_destroy(&chain->base, pAllocator); + + return result; +} + +VkResult +anv_wl_init_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + wsi->instance = instance; + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->displays) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = wsi_wl_surface_get_support; + wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; + wsi->base.get_formats = wsi_wl_surface_get_formats; + wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; + wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); + +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; + + return result; +} + +void +anv_wl_finish_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->displays, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c new file mode 100644 index 00000000000..843a6b62504 --- /dev/null +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -0,0 +1,758 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_wsi.h" + +#include "util/hash_table.h" + +struct wsi_x11_connection { + bool has_dri3; + bool has_present; +}; + +struct wsi_x11 { + struct anv_wsi_interface base; + + pthread_mutex_t mutex; + /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + struct hash_table *connections; +}; + +static struct wsi_x11_connection * +wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) +{ + xcb_query_extension_cookie_t dri3_cookie, pres_cookie; + xcb_query_extension_reply_t *dri3_reply, *pres_reply; + + struct wsi_x11_connection *wsi_conn = + anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi_conn) + return NULL; + + dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); + pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); + + dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); + pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); + if (dri3_reply == NULL || pres_reply == NULL) { + free(dri3_reply); + free(pres_reply); + anv_free(&instance->alloc, wsi_conn); + return NULL; + } + + wsi_conn->has_dri3 = dri3_reply->present != 0; + wsi_conn->has_present = pres_reply->present != 0; + + free(dri3_reply); + free(pres_reply); + + return wsi_conn; +} + +static void +wsi_x11_connection_destroy(struct anv_instance *instance, + struct wsi_x11_connection *conn) +{ + anv_free(&instance->alloc, conn); +} + +static struct wsi_x11_connection * +wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_connection_create(instance, conn); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->connections, conn); + if (entry) { + /* Oops, someone raced us to it */ + wsi_x11_connection_destroy(instance, wsi_conn); + } else { + entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +static const VkSurfaceFormatKHR formats[] = { + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, +}; + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, +}; + +static xcb_screen_t * +get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + if (screen_iter.data->root == root) + return screen_iter.data; + } + + return NULL; +} + +static xcb_visualtype_t * +screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_depth_iterator_t depth_iter = + xcb_screen_allowed_depths_iterator(screen); + + for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { + xcb_visualtype_iterator_t visual_iter = + xcb_depth_visuals_iterator (depth_iter.data); + + for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { + if (visual_iter.data->visual_id == visual_id) { + if (depth) + *depth = depth_iter.data->depth; + return visual_iter.data; + } + } + } + + return NULL; +} + +static xcb_visualtype_t * +connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + /* For this we have to iterate over all of the screens which is rather + * annoying. Fortunately, there is probably only 1. + */ + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, + visual_id, depth); + if (visual) + return visual; + } + + return NULL; +} + +static xcb_visualtype_t * +get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, + unsigned *depth) +{ + xcb_query_tree_cookie_t tree_cookie; + xcb_get_window_attributes_cookie_t attrib_cookie; + xcb_query_tree_reply_t *tree; + xcb_get_window_attributes_reply_t *attrib; + + tree_cookie = xcb_query_tree(conn, window); + attrib_cookie = xcb_get_window_attributes(conn, window); + + tree = xcb_query_tree_reply(conn, tree_cookie, NULL); + attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); + if (attrib == NULL || tree == NULL) { + free(attrib); + free(tree); + return NULL; + } + + xcb_window_t root = tree->root; + xcb_visualid_t visual_id = attrib->visual; + free(attrib); + free(tree); + + xcb_screen_t *screen = get_screen_for_root(conn, root); + if (screen == NULL) + return NULL; + + return screen_get_visualtype(screen, visual_id, depth); +} + +static bool +visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) +{ + uint32_t rgb_mask = visual->red_mask | + visual->green_mask | + visual->blue_mask; + + uint32_t all_mask = 0xffffffff >> (32 - depth); + + /* Do we have bits left over after RGB? */ + return (all_mask & ~rgb_mask) != 0; +} + +VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, connection); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + return false; + } + + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) + return false; + + if (visual_depth != 24 && visual_depth != 32) + return false; + + return true; +} + +static VkResult +x11_surface_get_support(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, surface->connection); + if (!wsi_conn) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + *pSupported = false; + return VK_SUCCESS; + } + + unsigned visual_depth; + if (!get_visualtype_for_window(surface->connection, surface->window, + &visual_depth)) { + *pSupported = false; + return VK_SUCCESS; + } + + if (visual_depth != 24 && visual_depth != 32) { + *pSupported = false; + return VK_SUCCESS; + } + + *pSupported = true; + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR *caps) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + xcb_get_geometry_cookie_t geom_cookie; + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom; + unsigned visual_depth; + + geom_cookie = xcb_get_geometry(surface->connection, surface->window); + + /* This does a round-trip. This is why we do get_geometry first and + * wait to read the reply until after we have a visual. + */ + xcb_visualtype_t *visual = + get_visualtype_for_window(surface->connection, surface->window, + &visual_depth); + + geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); + if (geom) { + VkExtent2D extent = { geom->width, geom->height }; + caps->currentExtent = extent; + caps->minImageExtent = extent; + caps->maxImageExtent = extent; + } else { + /* This can happen if the client didn't wait for the configure event + * to come back from the compositor. In that case, we don't know the + * size of the window so we just return valid "I don't know" stuff. + */ + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + } + free(err); + free(geom); + + if (visual_has_alpha(visual, visual_depth)) { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + } else { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } + + caps->minImageCount = 2; + caps->maxImageCount = 4; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_formats(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = ARRAY_SIZE(formats); + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); + *pSurfaceFormatCount = ARRAY_SIZE(formats); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateXcbSurfaceKHR( + VkInstance _instance, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceXcb *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; + surface->connection = pCreateInfo->connection; + surface->window = pCreateInfo->window; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct x11_image { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + xcb_get_geometry_cookie_t geom_cookie; + bool busy; +}; + +struct x11_swapchain { + struct anv_swapchain base; + + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t image_count; + uint32_t next_image; + struct x11_image images[0]; +}; + +static VkResult +x11_get_images(struct anv_swapchain *anv_chain, + uint32_t* pCount, VkImage *pSwapchainImages) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +x11_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[chain->next_image]; + + if (image->busy) { + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom = + xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); + if (!geom) { + free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + + if (geom->width != chain->extent.width || + geom->height != chain->extent.height) { + free(geom); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + free(geom); + + image->busy = false; + } + + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} + +static VkResult +x11_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[image_index]; + + assert(image_index < chain->image_count); + + xcb_void_cookie_t cookie; + + cookie = xcb_copy_area(chain->conn, + image->pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + image->busy = true; + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + xcb_void_cookie_t cookie; + + for (uint32_t i = 0; i < chain->image_count; i++) { + struct x11_image *image = &chain->images[i]; + + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + /* TODO: Delete images and free memory */ + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + struct x11_swapchain *chain; + xcb_void_cookie_t cookie; + VkResult result; + + int num_images = pCreateInfo->minImageCount; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = x11_swapchain_destroy; + chain->base.get_images = x11_get_images; + chain->base.acquire_next_image = x11_acquire_next_image; + chain->base.queue_present = x11_queue_present; + + chain->conn = surface->connection; + chain->window = surface->window; + chain->extent = pCreateInfo->imageExtent; + chain->image_count = num_images; + chain->next_image = 0; + + for (uint32_t i = 0; i < chain->image_count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + + image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + + surface = &image->color_surface; + + anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + memory->bo.is_winsys_bo = true; + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + chain->images[i].busy = false; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult +anv_x11_init_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->connections) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = x11_surface_get_support; + wsi->base.get_capabilities = x11_surface_get_capabilities; + wsi->base.get_formats = x11_surface_get_formats; + wsi->base.get_present_modes = x11_surface_get_present_modes; + wsi->base.create_swapchain = x11_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; + + return result; +} + +void +anv_x11_finish_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->connections, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/dev_icd.json.in b/src/intel/vulkan/dev_icd.json.in new file mode 100644 index 00000000000..84920365289 --- /dev/null +++ b/src/intel/vulkan/dev_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@build_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c new file mode 100644 index 00000000000..23327ec0724 --- /dev/null +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,589 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) +{ + static const uint32_t sampler_state_opcodes[] = { + [MESA_SHADER_VERTEX] = 43, + [MESA_SHADER_TESS_CTRL] = 44, /* HS */ + [MESA_SHADER_TESS_EVAL] = 45, /* DS */ + [MESA_SHADER_GEOMETRY] = 46, + [MESA_SHADER_FRAGMENT] = 47, + [MESA_SHADER_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [MESA_SHADER_VERTEX] = 38, + [MESA_SHADER_TESS_CTRL] = 39, + [MESA_SHADER_TESS_EVAL] = 40, + [MESA_SHADER_GEOMETRY] = 41, + [MESA_SHADER_FRAGMENT] = 42, + [MESA_SHADER_COMPUTE] = 0, + }; + + anv_foreach_stage(s, stages) { + if (cmd_buffer->state.samplers[s].alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); + } + + /* Always emit binding table pointers if we're asked to, since on SKL + * this is what flushes push constants. */ + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); + } +} + +GENX_FUNC(GEN7, GEN7) uint32_t +genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; + } + } + + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(scissor_state); +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, +}; + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + if (ANV_IS_HASWELL) + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .ConstantURBEntryReadLength = + push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + /* FIXME: figure out descriptors for gen7 */ + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } + + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) { + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + } + + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + + /* We use the gen8 state here because it only contains the additional + * min/max fields and, since they occur at the end of the packet and + * don't change the stride, they work on gen7 too. + */ + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { + + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const uint32_t depth_format = image ? + isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, + &image->depth_surface.isl) : D16_UNORM; + + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .DepthBufferSurfaceFormat = depth_format, + .LineWidth = cmd_buffer->state.dynamic.line_width, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN7_COLOR_CALC_STATE_length * 4, + 64); + struct GEN7_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + + struct anv_state ds_state = + anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = ds_state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + if (ANV_IS_HASWELL) { + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + stub(); +} diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c new file mode 100644 index 00000000000..7c054fa56d5 --- /dev/null +++ b/src/intel/vulkan/gen7_pipeline.c @@ -0,0 +1,410 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_pipeline_util.h" + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + struct GEN7_DEPTH_STENCIL_STATE state = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + if (info == NULL || info->attachmentCount == 0) { + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = true, + .WriteDisableRed = true, + .WriteDisableGreen = true, + .WriteDisableBlue = true); + } else { + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + +# if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +# endif + + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +# if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +# endif + ); + } + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +GENX_FUNC(GEN7, GEN75) VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + + assert(pCreateInfo->pRasterizationState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + const VkPipelineRasterizationStateCreateInfo *rs_info = + pCreateInfo->pRasterizationState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .KernelStartPointer = pipeline->gs_kernel, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, +# if (ANV_IS_HASWELL) + .ReorderMode = REORDER_TRAILING, +# else + .ReorderEnable = true, +# endif + .GSEnable = true); + } + + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_finishme("disabling ps"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + + } else { + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, + .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c new file mode 100644 index 00000000000..77bdb75260c --- /dev/null +++ b/src/intel/vulkan/gen7_state.c @@ -0,0 +1,264 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +GENX_FUNC(GEN7, GEN75) void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = false, + .SurfaceObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, +# endif + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampEnable = CLAMP_ENABLE_OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + uint32_t depth = 1; + if (range->layerCount > 1) { + depth = range->layerCount; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(&surface->isl); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ + .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, + .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? + TILEWALK_YMAJOR : TILEWALK_XMAJOR, + + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + + .RenderCacheReadWriteMode = 0, /* TEMPLATE */ + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->isl.row_pitch - 1, + .MinimumArrayElement = range->baseArrayLayer, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GENX(MOCS), + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .MCSEnable = false, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], +# else /* XXX: Seriously? */ + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, +# endif + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c new file mode 100644 index 00000000000..b741612c891 --- /dev/null +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -0,0 +1,914 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +#if ANV_GEN == 8 +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = 1.0, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = 0.0, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, + }; + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} +#endif + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN8_L3CNTLREG 0x7034 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t val = enable_slm ? + /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ + 0x60000021 : + /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ + 0x60000060; + bool changed = cmd_buffer->state.current_l3_config != val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); + cmd_buffer->state.current_l3_config = val; + } +} + +static void +__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} +static void +__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GEN9_3DSTATE_SF sf = { + GEN9_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->info.is_cherryview) + __emit_gen9_sf_state(cmd_buffer); + else + __emit_genx_sf_state(cmd_buffer); +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + config_l3(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { + __emit_sf_state(cmd_buffer); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); + } + + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length * 4, + 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length * 4, + 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen9.wm_depth_stencil); + } +#endif + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, + }; + + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if ANV_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_SET); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_RESET); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), + .WaitMode = PollingMode, + .CompareOperation = COMPARE_SAD_EQUAL_SDD, + .SemaphoreDataDword = VK_EVENT_SET, + .SemaphoreAddress = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }); + } + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c new file mode 100644 index 00000000000..f0411562fba --- /dev/null +++ b/src/intel/vulkan/gen8_pipeline.c @@ -0,0 +1,573 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_pipeline_util.h" + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), + .PrimitiveTopologyType = pipeline->topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t samples = 1; + + if (ms_info) + samples = ms_info->rasterizationSamples; + + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); + + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + + /* For details on 3DSTATE_RASTER multisample state, see the BSpec table + * "Multisample Modes State". + */ + .DXMultisampleRasterizationEnable = samples > 1, + .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, + .ForceMultisampling = false, + + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), +#if ANV_GEN == 8 + .ViewportZClipTestEnable = true, +#else + /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ + .ViewportZFarClipTestEnable = true, + .ViewportZNearClipTestEnable = true, +#endif + }; + + GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + uint32_t num_dwords = GENX(BLEND_STATE_length); + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GENX(BLEND_STATE) blend_state = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { + blend_state.IndependentAlphaBlendEnable = true; + } + + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + } + + for (uint32_t i = info->attachmentCount; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + uint32_t *dw = ANV_GEN == 8 ? + pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; + + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen8.wm_depth_stencil, 0, + sizeof(pipeline->gen8.wm_depth_stencil)); + memset(pipeline->gen9.wm_depth_stencil, 0, + sizeof(pipeline->gen9.wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ + + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); +} + +static void +emit_ms_state(struct anv_pipeline *pipeline, + const VkPipelineMultisampleStateCreateInfo *info) +{ + uint32_t samples = 1; + uint32_t log2_samples = 0; + + /* From the Vulkan 1.0 spec: + * If pSampleMask is NULL, it is treated as if the mask has all bits + * enabled, i.e. no coverage is removed from fragments. + * + * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. + */ + uint32_t sample_mask = 0xffff; + + if (info) { + samples = info->rasterizationSamples; + log2_samples = __builtin_ffs(samples) - 1; + } + + if (info && info->pSampleMask) + sample_mask &= info->pSampleMask[0]; + + if (info && info->sampleShadingEnable) + anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + .PixelPositionOffsetEnable = false, + + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), + .SampleMask = sample_mask); +} + +VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterizationState); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, + pCreateInfo->pMultisampleState, extra); + emit_ms_state(pipeline, pCreateInfo->pMultisampleState); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->ps_ksp0 == NO_KERNEL ? + 0 : pipeline->wm_prog_data.barycentric_interp_modes); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_kernel == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_kernel, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = gs_prog_data->vertices_in, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + .StaticOutput = gs_prog_data->static_vertex_count >= 0, + .StaticOutputVertexCount = + gs_prog_data->static_vertex_count >= 0 ? + gs_prog_data->static_vertex_count : 0, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : + pipeline->vs_vec4; + + if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .FunctionEnable = false, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. */ + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = vs_start, + .SingleVertexDispatch = false, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = false, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + wm_prog_data->num_varying_inputs, + +#if ANV_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + ); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, +#if ANV_GEN >= 9 + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? + ICMS_INNER_CONSERVATIVE : ICMS_NONE, +#else + .PixelShaderUsesInputCoverageMask = + wm_prog_data->uses_sample_mask, +#endif + ); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c new file mode 100644 index 00000000000..04cfff5444d --- /dev/null +++ b/src/intel/vulkan/gen8_state.c @@ -0,0 +1,493 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +static const uint32_t +isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; + +void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) +{ + #if ANV_GENx10 >= 90 + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; + } + #else + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = anv_halign[image_align_sa.width]; + *valign = anv_valign[image_align_sa.height]; + #endif +} + +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + unreachable(!"bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + #if ANV_GENx10 >= 90 + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + #else + return isl_surf_get_array_pitch_el_rows(surf); + #endif + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + #if ANV_GEN >= 9 + return isl_surf_get_array_pitch_el_rows(surf); + #else + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + #endif + } +} + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y0] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, + }; + + uint32_t halign, valign; + get_halign_valign(&surface->isl, &halign, &valign); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, + .TileMode = isl_to_gen_tiling[surface->isl.tiling], + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, + .MemoryObjectControlState = GENX(MOCS), + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, + .Height = iview->level_0_extent.height - 1, + .Width = iview->level_0_extent.width - 1, + .Depth = 0, /* TEMPLATE */ + .SurfacePitch = surface->isl.row_pitch - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ + .MinimumArrayElement = 0, /* TEMPLATE */ + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[surface->isl.msaa_layout], + .NumberofMultisamples = ffs(surface->isl.samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ + .XOffset = 0, + .YOffset = 0, + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + switch (template.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + template.Depth = range->layerCount - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_CUBE: + template.MinimumArrayElement = range->baseArrayLayer; + /* Same as SURFTYPE_2D, but divided by 6 */ + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_3D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + template.Depth = image->extent.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + template.RenderTargetViewExtent = iview->extent.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = CLAMP_MODE_OGL, +#if ANV_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .IndirectStatePointer = border_color_offset >> 6, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c new file mode 100644 index 00000000000..5498d1d68c6 --- /dev/null +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -0,0 +1,717 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + +/* XXX: Do we need this on more than just BDW? */ +#if (ANV_GEN >= 8) + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GENX(MOCS), + .GeneralStateBaseAddressModifyEnable = true, + + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), + .SurfaceStateMemoryObjectControlState = GENX(MOCS), + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GENX(MOCS), + .DynamicStateBaseAddressModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GENX(MOCS), + .IndirectObjectBaseAddressModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GENX(MOCS), + .InstructionBaseAddressModifyEnable = true, + +# if (ANV_GEN >= 8) + /* Broadwell requires that we specify a buffer size for a bunch of + * these fields. However, since we will be growing the BO's live, we + * just set them all to the maximum. + */ + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true, +# endif + ); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true); +} + +void genX(CmdPipelineBarrier)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + uint32_t b, *dw; + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; + + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; + } + + /* Mask out the Source access flags we care about */ + const uint32_t src_mask = + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT; + + src_flags = src_flags & src_mask; + + /* Mask out the destination access flags we care about */ + const uint32_t dst_mask = + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT; + + dst_flags = dst_flags & dst_mask; + + /* The src flags represent how things were used previously. This is + * what we use for doing flushes. + */ + struct GENX(PIPE_CONTROL) flush_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_cmd.DCFlushEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_cmd.DepthCacheFlushEnable = true; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + flush_cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to + * stall and wait for the flushing to finish, so we don't re-dirty the + * caches with in-flight rendering after the second PIPE_CONTROL + * invalidates. + */ + + if (dst_flags) + flush_cmd.CommandStreamerStallEnable = true; + + if (src_flags && dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); + } + + /* The dst flags represent how things will be used in the future. This + * is what we use for doing cache invalidations. + */ + struct GENX(PIPE_CONTROL) invalidate_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + invalidate_cmd.VFCacheInvalidationEnable = true; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + invalidate_cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_ACCESS_SHADER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_TRANSFER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + if (dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); + } +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .AddressModifyEnable = true, + .BufferPitch = 0, +#if (ANV_GEN >= 8) + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 +#else + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, +#endif + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +void +genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const bool has_stencil = iview && anv_format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = true, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, +#endif + .RenderTargetViewExtent = 1 - 1); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +#if ANV_GEN >= 8 || ANV_IS_HASWELL + .StencilBufferEnable = true, +#endif + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, +#endif + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); +} diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c new file mode 100644 index 00000000000..4c2e0bc6e0d --- /dev/null +++ b/src/intel/vulkan/genX_pipeline.c @@ -0,0 +1,126 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +VkResult +genX(compute_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + pipeline->blend_state.map = NULL; + + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + + pipeline->use_repclear = false; + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), +#if ANV_GEN > 7 + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, +#else + .GPGPUMode = true, +#endif + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .ResetGatewayTimer = true, +#if ANV_GEN <= 8 + .BypassGatewayControl = true, +#endif + .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h new file mode 100644 index 00000000000..696e2be7c3f --- /dev/null +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -0,0 +1,327 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static uint32_t +vertex_element_comp_control(enum isl_format format, unsigned comp) +{ + uint8_t bits; + switch (comp) { + case 0: bits = isl_format_layouts[format].channels.r.bits; break; + case 1: bits = isl_format_layouts[format].channels.g.bits; break; + case 2: bits = isl_format_layouts[format].channels.b.bits; break; + case 3: bits = isl_format_layouts[format].channels.a.bits; break; + default: unreachable("Invalid component"); + } + + if (bits) { + return VFCOMP_STORE_SRC; + } else if (comp < 3) { + return VFCOMP_STORE_0; + } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || + isl_format_layouts[format].channels.r.type == ISL_SINT) { + assert(comp == 3); + return VFCOMP_STORE_1_INT; + } else { + assert(comp == 3); + return VFCOMP_STORE_1_FP; + } +} + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t elements; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + elements = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + elements |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + elements = inputs_read >> VERT_ATTRIB_GENERIC0; + } + +#if ANV_GEN >= 8 + /* On BDW+, we only need to allocate space for base ids. Setting up + * the actual vertex and instance id is a separate packet. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#else + /* On Haswell and prior, vertex and instance id are created by using the + * ComponentControl fields, so we need an element for any of them. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid || + pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#endif + + uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + if (elem_count == 0) + return; + + uint32_t *p; + + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); + + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, + NULL); + + assert(desc->binding < 32); + + if ((elements & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offset, + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); + +#if ANV_GEN >= 8 + /* On Broadwell and later, we have a separate VF_INSTANCING packet + * that controls instancing. On Haswell and prior, that's part of + * VERTEX_BUFFER_STATE which we emit later. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), + .InstancingEnable = pipeline->instancing_enable[desc->binding], + .VertexElementIndex = slot, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); +#endif + } + + const uint32_t id_slot = __builtin_popcount(elements); + if (needs_svgs_elem) { + /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: + * "Within a VERTEX_ELEMENT_STATE structure, if a Component + * Control field is set to something other than VFCOMP_STORE_SRC, + * no higher-numbered Component Control fields may be set to + * VFCOMP_STORE_SRC" + * + * This means, that if we have BaseInstance, we need BaseVertex as + * well. Just do all or nothing. + */ + uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance) ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0; + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = 32, /* Reserved for this */ + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32_UINT, + .Component0Control = base_ctrl, + .Component1Control = base_ctrl, +#if ANV_GEN >= 8 + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#else + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID, +#endif + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); + } + +#if ANV_GEN >= 8 + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = id_slot, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = id_slot); +#endif +} + +static inline void +emit_urb_setup(struct anv_pipeline *pipeline) +{ +#if ANV_GEN == 7 + struct anv_device *device = pipeline->device; + + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); +#endif + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); +} + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 2048); +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, + [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, + [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, +}; diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h new file mode 100644 index 00000000000..67f798ab66e --- /dev/null +++ b/src/intel/vulkan/genX_state_util.h @@ -0,0 +1,112 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + +static enum isl_format +anv_surface_format(const struct anv_device *device, enum isl_format format, + bool storage) +{ + if (storage) { + return isl_lower_storage_image_format(&device->isl_dev, format); + } else { + return format; + } +} + +#if ANV_GEN > 7 || ANV_IS_HASWELL +static const uint32_t vk_to_gen_swizzle[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; +#endif + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; diff --git a/src/intel/vulkan/intel_icd.json.in b/src/intel/vulkan/intel_icd.json.in new file mode 100644 index 00000000000..d9b363a9762 --- /dev/null +++ b/src/intel/vulkan/intel_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@install_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/tests/.gitignore b/src/intel/vulkan/tests/.gitignore new file mode 100644 index 00000000000..5d054055685 --- /dev/null +++ b/src/intel/vulkan/tests/.gitignore @@ -0,0 +1,5 @@ +block_pool +block_pool_no_free +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/intel/vulkan/tests/Makefile.am b/src/intel/vulkan/tests/Makefile.am new file mode 100644 index 00000000000..883013d86c6 --- /dev/null +++ b/src/intel/vulkan/tests/Makefile.am @@ -0,0 +1,46 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c new file mode 100644 index 00000000000..86d1a76151f --- /dev/null +++ b/src/intel/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; + uint32_t back_blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + int32_t block, *data; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = anv_block_pool_alloc(job->pool); + data = job->pool->map + block; + *data = block; + assert(block >= 0); + job->blocks[i] = block; + + block = anv_block_pool_alloc_back(job->pool); + data = job->pool->map + block; + *data = block; + assert(block < 0); + job->back_blocks[i] = -block; + } + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = job->blocks[i]; + data = job->pool->map + block; + assert(*data == block); + + block = -job->back_blocks[i]; + data = job->pool->map + block; + assert(*data == block); + } + + return NULL; +} + +static void validate_monotonic(uint32_t **blocks) +{ + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < blocks[i][next[i]]) { + thread_max = blocks[i][next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); + + highest = blocks[max_thread_idx][next[max_thread_idx]]; + next[max_thread_idx]++; + } +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* Validate that the block allocations were monotonic */ + uint32_t *block_ptrs[NUM_THREADS]; + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].blocks; + validate_monotonic(block_ptrs); + + /* Validate that the back block allocations were monotonic */ + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].back_blocks; + validate_monotonic(block_ptrs); + + anv_block_pool_finish(&pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c new file mode 100644 index 00000000000..878ec19a595 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } + + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 00000000000..2f4eb47fe45 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,66 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c new file mode 100644 index 00000000000..4b248c2ee66 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool_test_helper.h b/src/intel/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 00000000000..0e56431303f --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} -- cgit v1.2.3 From 8c23392c26916711b7b02337fd342ee9765b6fd4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:44:06 -0800 Subject: anv/formats: Don't use a compound literal to initialize a const array Doing so makes older versions of GCC rather grumpy. Newere GCC fixes this, but using a compound literal isn't really gaining us anything anyway. --- src/intel/vulkan/anv_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 7798a7bbde3..b4b52aa6053 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -24,8 +24,8 @@ #include "anv_private.h" #include "brw_surface_formats.h" -#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) -#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) +#define RGBA { 0, 1, 2, 3 } +#define BGRA { 2, 1, 0, 3 } #define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ [__vk_fmt] = { \ -- cgit v1.2.3 From e881c73975cb12ce58d4ebc362c6ad18a8e4b3ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 11:04:53 -0800 Subject: anv/pipeline: Don't leak the binding map --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 2 ++ src/intel/vulkan/anv_pipeline.c | 5 +++++ src/intel/vulkan/genX_pipeline.c | 1 + 3 files changed, 8 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index c58a93878ee..4600872d1f6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -391,4 +391,6 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; } + + ralloc_free(mem_ctx); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index a7feefb540e..2f1ce3956a9 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -193,6 +193,11 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + free(pipeline->bindings[s].surface_to_descriptor); + free(pipeline->bindings[s].sampler_to_descriptor); + } + anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 4c2e0bc6e0d..54ec8307d02 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -72,6 +72,7 @@ genX(compute_pipeline_create)( */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; -- cgit v1.2.3 From e0565f40ea7f1653318a3e33cfeb46dcdbfd28ae Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 11:44:26 -0800 Subject: anv/pipeline: Use nir's num_images for allocating image_params --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 5 ++++- src/intel/vulkan/anv_pipeline.c | 5 ++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 4600872d1f6..4be630bcbe8 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -280,6 +280,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, struct anv_pipeline_bind_map map = { .surface_count = 0, .sampler_count = 0, + .image_count = 0, }; for (uint32_t set = 0; set < layout->num_sets; set++) { @@ -351,6 +352,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } if (map.image_count > 0) { + assert(map.image_count <= MAX_IMAGES); nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -369,7 +371,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } struct anv_push_constants *null_data = NULL; - const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const gl_constant_value **param = + prog_data->param + (shader->num_uniforms / 4); const struct brw_image_param *image_param = null_data->images; for (uint32_t i = 0; i < map.image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 2f1ce3956a9..27872d2769a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -341,9 +341,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - if (pipeline->bindings[stage].image_count > 0) - prog_data->nr_params += pipeline->bindings[stage].image_count * - BRW_IMAGE_PARAM_SIZE; + if (nir->info.num_images > 0) + prog_data->nr_params += nir->info.num_images * BRW_IMAGE_PARAM_SIZE; if (prog_data->nr_params > 0) { /* XXX: I think we're leaking this */ -- cgit v1.2.3 From 1b37276467e47919256c0a171b92004d3cfaaab4 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Thu, 18 Feb 2016 12:30:27 -0800 Subject: vulkan: fix out-of-tree build We need to be able to find the generated gen*pack.h headers. Acked-by: Jason Ekstrand --- src/intel/vulkan/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 2144e5a691a..ccd98856b4b 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -65,6 +65,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/intel \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From d5bb23156d698675fff74b1e8207ce0217c148db Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 13:37:01 -0800 Subject: anv/allocator: Set is_winsys_bo to false for block pool BOs --- src/intel/vulkan/anv_allocator.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index a7ae975656b..3b62bda3e93 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -253,6 +253,7 @@ anv_block_pool_init(struct anv_block_pool *pool, pool->bo.gem_handle = 0; pool->bo.offset = 0; pool->bo.size = 0; + pool->bo.is_winsys_bo = false; pool->block_size = block_size; pool->free_list = ANV_FREE_LIST_EMPTY; pool->back_free_list = ANV_FREE_LIST_EMPTY; -- cgit v1.2.3 From 698ea542830ba0d56e514492fbdf73e3898d4c17 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 13:54:15 -0800 Subject: anv/pipeline: Fix a typo in the pipeline layout code --- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 4be630bcbe8..e745bf661ee 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -194,7 +194,7 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) if (tex->sampler) { unsigned set = tex->sampler->var->data.descriptor_set; unsigned binding = tex->sampler->var->data.binding; - tex->sampler_index = state->set[set].surface_offsets[binding]; + tex->sampler_index = state->set[set].sampler_offsets[binding]; lower_tex_deref(tex, tex->sampler, &tex->sampler_index, nir_tex_src_sampler_offset, state); } -- cgit v1.2.3 From 371b4a5b33a13f35fa7783510d2d90685a9a2e8a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:08:27 -0800 Subject: anv: Switch over to the macros in genxml --- src/intel/vulkan/Makefile.am | 8 +- src/intel/vulkan/anv_gen_macros.h | 146 ---------------------------------- src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_cmd_buffer.c | 80 ++++++++++--------- src/intel/vulkan/gen7_pipeline.c | 38 ++++----- src/intel/vulkan/gen7_state.c | 18 ++--- src/intel/vulkan/gen8_cmd_buffer.c | 32 ++++---- src/intel/vulkan/gen8_pipeline.c | 14 ++-- src/intel/vulkan/gen8_state.c | 4 +- src/intel/vulkan/genX_cmd_buffer.c | 25 +++--- src/intel/vulkan/genX_pipeline.c | 19 ++--- src/intel/vulkan/genX_pipeline_util.h | 10 +-- src/intel/vulkan/genX_state_util.h | 2 +- 13 files changed, 122 insertions(+), 275 deletions(-) delete mode 100644 src/intel/vulkan/anv_gen_macros.h (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index ccd98856b4b..6be4f9fb427 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -108,7 +108,7 @@ libanv_gen7_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70 libanv_gen75_la_SOURCES = \ genX_cmd_buffer.c \ @@ -116,7 +116,7 @@ libanv_gen75_la_SOURCES = \ gen7_cmd_buffer.c \ gen7_pipeline.c \ gen7_state.c -libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75 libanv_gen8_la_SOURCES = \ genX_cmd_buffer.c \ @@ -124,7 +124,7 @@ libanv_gen8_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80 libanv_gen9_la_SOURCES = \ genX_cmd_buffer.c \ @@ -132,7 +132,7 @@ libanv_gen9_la_SOURCES = \ gen8_cmd_buffer.c \ gen8_pipeline.c \ gen8_state.c -libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90 if HAVE_EGL_PLATFORM_WAYLAND BUILT_SOURCES += \ diff --git a/src/intel/vulkan/anv_gen_macros.h b/src/intel/vulkan/anv_gen_macros.h deleted file mode 100644 index ef2ecd55a9b..00000000000 --- a/src/intel/vulkan/anv_gen_macros.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -/* Macros for handling per-gen compilation. - * - * The prefixing macros GENX() and genX() automatically prefix whatever you - * give them by GENX_ or genX_ where X is the gen number. - * - * You can declare a function to be used on some range of gens like this: - * - * GENX_FUNC(GEN7, GEN75) void - * genX(my_function_name)(args...) - * { - * // Do stuff - * } - * - * If the file is compiled for any set of gens containing gen7 and gen75, - * the function will effectively only get compiled twice as - * gen7_my_function_nmae and gen75_my_function_name. The function has to - * be compilable on all gens, but it will become a static inline that gets - * discarded by the compiler on all gens not in range. - * - * You can do pseudo-runtime checks in your function such as - * - * if (ANV_GEN > 8 || ANV_IS_HASWELL) { - * // Do something - * } - * - * The contents of the if statement must be valid regardless of gen, but - * the if will get compiled away on everything except haswell. - * - * For places where you really do have a compile-time conflict, you can - * use preprocessor logic: - * - * #if (ANV_GEN > 8 || ANV_IS_HASWELL) - * // Do something - * #endif - * - * However, it is strongly recommended that the former be used whenever - * possible. - */ - -/* Base macro defined on the command line. If we don't have this, we can't - * do anything. - */ -#ifdef ANV_GENx10 - -/* Gen checking macros */ -#define ANV_GEN ((ANV_GENx10) / 10) -#define ANV_IS_HASWELL ((ANV_GENx10) == 75) - -/* Prefixing macros */ -#if (ANV_GENx10 == 70) -# define GENX(X) GEN7_##X -# define genX(x) gen7_##x -#elif (ANV_GENx10 == 75) -# define GENX(X) GEN75_##X -# define genX(x) gen75_##x -#elif (ANV_GENx10 == 80) -# define GENX(X) GEN8_##X -# define genX(x) gen8_##x -#elif (ANV_GENx10 == 90) -# define GENX(X) GEN9_##X -# define genX(x) gen9_##x -#else -# error "Need to add prefixing macros for your gen" -#endif - -/* Macros for comparing gens */ -#if (ANV_GENx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (ANV_GENx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) - -#endif /* ANV_GENx10 */ diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ba86333525e..479f3826135 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -60,7 +60,6 @@ typedef uint32_t xcb_window_t; #include #include "anv_entrypoints.h" -#include "anv_gen_macros.h" #include "brw_context.h" #include "isl/isl.h" diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 23327ec0724..e96400d5b6c 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) @@ -55,7 +55,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) if (state.offset == 0) continue; - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), ._3DCommandSubOpcode = push_constant_opcodes[stage], .ConstantBody = { .PointerToConstantBuffer0 = { .offset = state.offset }, @@ -95,7 +95,7 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, anv_foreach_stage(s, stages) { if (cmd_buffer->state.samplers[s].alloc_size > 0) { anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ._3DCommandSubOpcode = sampler_state_opcodes[s], .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); } @@ -103,7 +103,7 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, /* Always emit binding table pointers if we're asked to, since on SKL * this is what flushes push constants. */ anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ._3DCommandSubOpcode = binding_table_opcodes[s], .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); } @@ -168,6 +168,7 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } +#if GEN_GEN == 7 && !GEN_IS_HASWELL static void emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkRect2D *scissors) @@ -214,8 +215,8 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, anv_state_clflush(scissor_state); } -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.dynamic.scissor.count > 0) { emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, @@ -232,6 +233,7 @@ genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) }); } } +#endif static const uint32_t vk_to_gen_index_type[] = { [VK_INDEX_TYPE_UINT16] = INDEX_WORD, @@ -253,7 +255,7 @@ void genX(CmdBindIndexBuffer)( ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; - if (ANV_IS_HASWELL) + if (GEN_IS_HASWELL) cmd_buffer->state.restart_index = restart_index_for_type[indexType]; cmd_buffer->state.gen7.index_buffer = buffer; cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; @@ -306,20 +308,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, .KernelStartPointer = pipeline->cs_simd, .BindingTablePointer = surfaces.offset, .SamplerStatePointer = samplers.offset, .ConstantURBEntryReadLength = push_constant_regs, +#if !GEN_IS_HASWELL .ConstantURBEntryReadOffset = 0, +#endif .BarrierEnable = cs_prog_data->uses_barrier, .SharedLocalMemorySize = slm_size, .NumberofThreadsinGPGPUThreadGroup = pipeline->cs_thread_width_max); - const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), .InterfaceDescriptorTotalLength = size, .InterfaceDescriptorDataStartAddress = state.offset); @@ -335,7 +339,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), .PipelineSelection = GPGPU); cmd_buffer->state.current_pipeline = GPGPU; } @@ -371,16 +375,16 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) const uint32_t num_dwords = 1 + num_buffers * 4; p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN7_3DSTATE_VERTEX_BUFFERS); + GENX(3DSTATE_VERTEX_BUFFERS)); uint32_t vb, i = 0; for_each_bit(vb, vb_emit) { struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - struct GEN7_VERTEX_BUFFER_STATE state = { + struct GENX(VERTEX_BUFFER_STATE) state = { .VertexBufferIndex = vb, .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .VertexBufferMemoryObjectControlState = GENX(MOCS), .AddressModifyEnable = true, .BufferPitch = pipeline->binding_stride[vb], .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, @@ -388,7 +392,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .InstanceDataStepRate = 1 }; - GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); i++; } } @@ -416,7 +420,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) * PIPE_CONTROL needs to be sent before any combination of VS * associated 3DSTATE." */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DepthStallEnable = true, .PostSyncOperation = WriteImmediateData, .Address = { &cmd_buffer->device->workaround_bo, 0 }); @@ -456,9 +460,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, &image->depth_surface.isl) : D16_UNORM; - uint32_t sf_dw[GEN7_3DSTATE_SF_length]; - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, .GlobalDepthOffsetEnableSolid = enable_bias, @@ -468,7 +472,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp }; - GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); } @@ -477,9 +481,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN7_COLOR_CALC_STATE_length * 4, + GENX(COLOR_CALC_STATE_length) * 4, 64); - struct GEN7_COLOR_CALC_STATE cc = { + struct GENX(COLOR_CALC_STATE) cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], @@ -489,12 +493,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackFaceStencilReferenceValue = cmd_buffer->state.dynamic.stencil_reference.back, }; - GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_CC_STATE_POINTERS, + GENX(3DSTATE_CC_STATE_POINTERS), .ColorCalcStatePointer = cc_state.offset); } @@ -502,12 +506,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), .StencilTestMask = @@ -520,15 +524,15 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilWriteMask = cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); struct anv_state ds_state = anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, pipeline->gen7.depth_stencil_state, - GEN7_DEPTH_STENCIL_STATE_length, 64); + GENX(DEPTH_STENCIL_STATE_length), 64); anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), .PointertoDEPTH_STENCIL_STATE = ds_state.offset); } @@ -538,16 +542,18 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; uint32_t offset = cmd_buffer->state.gen7.index_offset; - if (ANV_IS_HASWELL) { - anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index); - } +#if GEN_IS_HASWELL + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); +#endif - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), +#if !GEN_IS_HASWELL .CutIndexEnable = pipeline->primitive_restart, +#endif .IndexFormat = cmd_buffer->state.gen7.index_type, - .MemoryObjectControlState = GEN7_MOCS, + .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); } diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7c054fa56d5..009a79ac815 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_pipeline_util.h" @@ -39,8 +39,8 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), /* LegacyGlobalDepthBiasEnable */ @@ -69,7 +69,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .PointWidth = 1.0, }; - GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); + GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); } static void @@ -85,7 +85,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, return; } - struct GEN7_DEPTH_STENCIL_STATE state = { + struct GENX(DEPTH_STENCIL_STATE) state = { .DepthTestEnable = info->depthTestEnable, .DepthBufferWriteEnable = info->depthWriteEnable, .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], @@ -103,7 +103,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); + GENX(DEPTH_STENCIL_STATE_pack)(NULL, &pipeline->gen7.depth_stencil_state, &state); } static void @@ -116,7 +116,7 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, if (info == NULL || info->attachmentCount == 0) { pipeline->blend_state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, + GENX(BLEND_STATE), 64, .ColorBufferBlendEnable = false, .WriteDisableAlpha = true, .WriteDisableRed = true, @@ -129,7 +129,7 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; pipeline->blend_state = anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, + GENX(BLEND_STATE), 64, .ColorBufferBlendEnable = a->blendEnable, .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ @@ -169,11 +169,11 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, ); } - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), .BlendStatePointer = pipeline->blend_state.offset); } -GENX_FUNC(GEN7, GEN75) VkResult +VkResult genX(graphics_pipeline_create)( VkDevice _device, struct anv_pipeline_cache * cache, @@ -216,7 +216,7 @@ genX(graphics_pipeline_create)( const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState; - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], .CullMode = vk_to_gen_cullmode[rs_info->cullMode], .ClipEnable = true, @@ -237,11 +237,11 @@ genX(graphics_pipeline_create)( uint32_t samples = 1; uint32_t log2_samples = __builtin_ffs(samples) - 1; - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), .PixelLocation = PIXLOC_CENTER, .NumberofMultisamples = log2_samples); - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xff); const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; @@ -314,7 +314,7 @@ genX(graphics_pipeline_create)( .DispatchMode = gs_prog_data->base.dispatch_mode, .GSStatisticsEnable = true, .IncludePrimitiveID = gs_prog_data->include_primitive_id, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ReorderMode = REORDER_TRAILING, # else .ReorderEnable = true, @@ -326,10 +326,10 @@ genX(graphics_pipeline_create)( anv_finishme("disabling ps"); /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .ThreadDispatchEnable = false, .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ @@ -349,7 +349,7 @@ genX(graphics_pipeline_create)( anv_finishme("primitive_id needs sbe swizzling setup"); /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, .VertexURBEntryReadLength = urb_length, .VertexURBEntryReadOffset = urb_offset, @@ -390,7 +390,7 @@ genX(graphics_pipeline_create)( .KernelStartPointer2 = pipeline->ps_ksp2); /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .ThreadDispatchEnable = true, .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 77bdb75260c..5323c378d02 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_state_util.h" @@ -43,7 +43,7 @@ genX(init_device_state)(struct anv_device *device) batch.start = batch.next = cmds; batch.end = (void *) cmds + sizeof(cmds); - anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), .PipelineSelection = _3D); anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), @@ -52,7 +52,7 @@ genX(init_device_state)(struct anv_device *device) anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); assert(batch.next <= batch.end); @@ -60,7 +60,7 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -GENX_FUNC(GEN7, GEN75) void +void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) @@ -79,7 +79,7 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, .Width = (num_elements - 1) & 0x7f, .Depth = ((num_elements - 1) >> 21) & 0x3f, .SurfacePitch = stride - 1, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ShaderChannelSelectRed = SCS_RED, .ShaderChannelSelectGreen = SCS_GREEN, .ShaderChannelSelectBlue = SCS_BLUE, @@ -107,7 +107,7 @@ VkResult genX(CreateSampler)( if (!sampler) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - struct GEN7_SAMPLER_STATE sampler_state = { + struct GENX(SAMPLER_STATE) sampler_state = { .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampEnable = CLAMP_ENABLE_OGL, @@ -145,7 +145,7 @@ VkResult genX(CreateSampler)( .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], }; - GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); *pSampler = anv_sampler_to_handle(sampler); @@ -227,7 +227,7 @@ genX(fill_image_surface_state)(struct anv_device *device, void *state_map, .SurfaceMinLOD = 0, /* TEMPLATE */ .MCSEnable = false, -# if (ANV_IS_HASWELL) +# if (GEN_IS_HASWELL) .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index b741612c891..3221f5e2dc4 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" static uint32_t cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) @@ -70,7 +70,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) return flushed; } -#if ANV_GEN == 8 +#if GEN_GEN == 8 static void emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, uint32_t count, const VkViewport *viewports) @@ -213,6 +213,8 @@ __emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, cmd_buffer->state.pipeline->gen8.sf); } + +#include "genxml/gen9_pack.h" static void __emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) { @@ -339,14 +341,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) * across different state packets for gen8 and gen9. We handle that by * using a big old #if switch here. */ -#if ANV_GEN == 8 +#if GEN_GEN == 8 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN8_COLOR_CALC_STATE_length * 4, + GENX(COLOR_CALC_STATE_length) * 4, 64); - struct GEN8_COLOR_CALC_STATE cc = { + struct GENX(COLOR_CALC_STATE) cc = { .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], @@ -356,13 +358,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackFaceStencilReferenceValue = cmd_buffer->state.dynamic.stencil_reference.back, }; - GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, + GENX(3DSTATE_CC_STATE_POINTERS), .ColorCalcStatePointer = cc_state.offset, .ColorCalcStatePointerValid = true); } @@ -370,10 +372,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { + GENX(3DSTATE_WM_DEPTH_STENCIL_header), /* Is this what we need to do? */ .StencilBufferWriteEnable = @@ -389,8 +391,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilWriteMask = cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, - &wm_depth_stencil); + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, pipeline->gen8.wm_depth_stencil); @@ -568,7 +570,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { -#if ANV_GEN < 10 +#if GEN_GEN < 10 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: * * Software must clear the COLOR_CALC_STATE Valid field in @@ -583,7 +585,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) #endif anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = GPGPU); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index f0411562fba..dc15e2066c5 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_pipeline_util.h" @@ -83,7 +83,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->disable_scissor), -#if ANV_GEN == 8 +#if GEN_GEN == 8 .ViewportZClipTestEnable = true, #else /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ @@ -178,7 +178,7 @@ static void emit_ds_state(struct anv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *info) { - uint32_t *dw = ANV_GEN == 8 ? + uint32_t *dw = GEN_GEN == 8 ? pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; if (info == NULL) { @@ -414,7 +414,7 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (pipeline->ps_ksp0 == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), @@ -477,7 +477,7 @@ genX(graphics_pipeline_create)( .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, .Attribute1ActiveComponentFormat = ACF_XYZW, .Attribute2ActiveComponentFormat = ACF_XYZW, @@ -556,7 +556,7 @@ genX(graphics_pipeline_create)( .PixelShaderIsPerSample = per_sample_ps, .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .PixelShaderPullsBary = wm_prog_data->pulls_bary, .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? ICMS_INNER_CONSERVATIVE : ICMS_NONE, diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 04cfff5444d..fdde705f0d6 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -29,8 +29,8 @@ #include "anv_private.h" -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" #include "genX_state_util.h" diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5498d1d68c6..9be87a3ff05 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -26,15 +26,8 @@ #include "anv_private.h" -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) @@ -48,7 +41,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) scratch_bo = &device->scratch_block_pool.bo; /* XXX: Do we need this on more than just BDW? */ -#if (ANV_GEN >= 8) +#if (GEN_GEN >= 8) /* Emit a render target cache flush. * * This isn't documented anywhere in the PRM. However, it seems to be @@ -81,7 +74,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) .InstructionMemoryObjectControlState = GENX(MOCS), .InstructionBaseAddressModifyEnable = true, -# if (ANV_GEN >= 8) +# if (GEN_GEN >= 8) /* Broadwell requires that we specify a buffer size for a bunch of * these fields. However, since we will be growing the BO's live, we * just set them all to the maximum. @@ -288,7 +281,7 @@ emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, .VertexBufferIndex = 32, /* Reserved for this */ .AddressModifyEnable = true, .BufferPitch = 0, -#if (ANV_GEN >= 8) +#if (GEN_GEN >= 8) .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = { bo, offset }, .BufferSize = 8 @@ -543,7 +536,7 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != _3D) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = _3D); @@ -587,7 +580,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .Depth = 1 - 1, .MinimumArrayElement = 0, .DepthBufferObjectControlState = GENX(MOCS), -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, #endif .RenderTargetViewExtent = 1 - 1); @@ -620,7 +613,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* Emit 3DSTATE_STENCIL_BUFFER */ if (has_stencil) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), -#if ANV_GEN >= 8 || ANV_IS_HASWELL +#if GEN_GEN >= 8 || GEN_IS_HASWELL .StencilBufferEnable = true, #endif .StencilBufferObjectControlState = GENX(MOCS), @@ -632,7 +625,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) */ .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, #endif .SurfaceBaseAddress = { diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 54ec8307d02..41a5d0f889c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -23,15 +23,8 @@ #include "anv_private.h" -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" VkResult genX(compute_pipeline_create)( @@ -94,19 +87,19 @@ genX(compute_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), -#if ANV_GEN > 7 +#if GEN_GEN > 7 .ScratchSpaceBasePointerHigh = 0, .StackSize = 0, #else .GPGPUMode = true, #endif .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2, .ResetGatewayTimer = true, -#if ANV_GEN <= 8 +#if GEN_GEN <= 8 .BypassGatewayControl = true, #endif - .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2, .CURBEAllocationSize = 0); struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 696e2be7c3f..51fbd8bf273 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -68,7 +68,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements = inputs_read >> VERT_ATTRIB_GENERIC0; } -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 /* On BDW+, we only need to allocate space for base ids. Setting up * the actual vertex and instance id is a separate packet. */ @@ -123,7 +123,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, }; GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 /* On Broadwell and later, we have a separate VF_INSTANCING packet * that controls instancing. On Haswell and prior, that's part of * VERTEX_BUFFER_STATE which we emit later. @@ -158,7 +158,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, .SourceElementFormat = ISL_FORMAT_R32G32_UINT, .Component0Control = base_ctrl, .Component1Control = base_ctrl, -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 .Component2Control = VFCOMP_STORE_0, .Component3Control = VFCOMP_STORE_0, #else @@ -169,7 +169,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); } -#if ANV_GEN >= 8 +#if GEN_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, .VertexIDComponentNumber = 2, @@ -183,7 +183,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, static inline void emit_urb_setup(struct anv_pipeline *pipeline) { -#if ANV_GEN == 7 +#if GEN_GEN == 7 && !GEN_IS_HASWELL struct anv_device *device = pipeline->device; /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h index 67f798ab66e..10b3a9f42c5 100644 --- a/src/intel/vulkan/genX_state_util.h +++ b/src/intel/vulkan/genX_state_util.h @@ -57,7 +57,7 @@ anv_surface_format(const struct anv_device *device, enum isl_format format, } } -#if ANV_GEN > 7 || ANV_IS_HASWELL +#if GEN_GEN > 7 || GEN_IS_HASWELL static const uint32_t vk_to_gen_swizzle[] = { [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, -- cgit v1.2.3 From 1f1cf6fcb0ea7c27b573aab0396942875fa3dba6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 09:12:36 -0800 Subject: anv: Get rid of GENX_FUNC It was a bad idea. --- src/intel/genxml/gen_macros.h | 58 -------------------------------------- src/intel/vulkan/gen7_cmd_buffer.c | 12 ++++---- 2 files changed, 7 insertions(+), 63 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/genxml/gen_macros.h b/src/intel/genxml/gen_macros.h index 2c47979f35a..2658d032928 100644 --- a/src/intel/genxml/gen_macros.h +++ b/src/intel/genxml/gen_macros.h @@ -88,61 +88,3 @@ #else # error "Need to add prefixing macros for this gen" #endif - -/* Macros for comparing gens - * - * TODO: This wasn't the best idea. We really need to deprecate it. - */ -#if (GEN_VERSIONx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (GEN_VERSIONx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (GEN_VERSIONx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index e96400d5b6c..7377487cf7e 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -70,9 +70,10 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) return flushed; } -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages) +#if GEN_GEN == 7 && !GEN_IS_HASWELL +void +gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) { static const uint32_t sampler_state_opcodes[] = { [MESA_SHADER_VERTEX] = 43, @@ -109,8 +110,8 @@ genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, } } -GENX_FUNC(GEN7, GEN7) uint32_t -genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +uint32_t +gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & cmd_buffer->state.pipeline->active_stages; @@ -156,6 +157,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) return dirty; } +#endif /* GEN_GEN == 7 && !GEN_IS_HASWELL */ static inline int64_t clamp_int64(int64_t x, int64_t min, int64_t max) -- cgit v1.2.3 From b5868d2343dab94be6a8a8e56632fbd0c42a1a2c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 19:29:05 -0800 Subject: anv: Zero out the WSI array when initializing the instance --- src/intel/vulkan/anv_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a8835f74179..7a5cb234ac5 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -254,6 +254,8 @@ VkResult anv_CreateInstance( instance->apiVersion = client_version; instance->physicalDeviceCount = -1; + memset(instance->wsi, 0, sizeof(instance->wsi)); + _mesa_locale_init(); VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); -- cgit v1.2.3 From f1dddeadc235cff20ceb7b8f7d3b70dc92cbe76b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 20:02:37 -0800 Subject: anv: Fix a typo in apply_dynamic_offsets shader->num_uniforms is in terms of bytes in i965. --- src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c index e71a8ffb1f4..46bc5d23a4e 100644 --- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -161,9 +161,9 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, struct anv_push_constants *null_data = NULL; for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { - prog_data->param[i * 2 + shader->num_uniforms] = + prog_data->param[i * 2 + shader->num_uniforms / 4] = (const union gl_constant_value *)&null_data->dynamic[i].offset; - prog_data->param[i * 2 + 1 + shader->num_uniforms] = + prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] = (const union gl_constant_value *)&null_data->dynamic[i].range; } -- cgit v1.2.3 From 7b2c63a53ca0ec685085cbf6b2e1f0da00752d91 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 18 Feb 2016 14:05:31 -0800 Subject: anv/meta_blit: Handle compressed textures in anv_CmdCopyImage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with anv_CmdCopyBufferToImage, compressed textures require special handling during copies. Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 62 ++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 07ebcbc06b1..06f13ecc8db 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -696,31 +696,34 @@ void anv_CmdCopyImage( }, cmd_buffer, 0); - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, - .z = 0, - }; - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.layerCount == 1 && - pRegions[r].dstSubresource.layerCount == 1); - num_slices = pRegions[r].extent.depth; - } else { - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); - assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].dstSubresource.layerCount; - } - const uint32_t dest_base_array_slice = anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, &pRegions[r].dstOffset); - for (unsigned slice = 0; slice < num_slices; slice++) { + + unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice; + src_offset.z += slice_3d + slice_array; + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -733,20 +736,29 @@ void anv_CmdCopyImage( .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = pRegions[r].dstSubresource.mipLevel, .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].extent); meta_emit_blit(cmd_buffer, src_image, &src_iview, src_offset, - pRegions[r].extent, + img_extent_el, dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, + dest_offset_el, + img_extent_el, VK_FILTER_NEAREST); + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; } } -- cgit v1.2.3 From 08b408311cb8fdbeae6d7ff5474107b0868c6ec9 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Mon, 22 Feb 2016 11:31:15 -0800 Subject: vulkan: fix out-of-tree builds --- src/intel/vulkan/Makefile.am | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 6be4f9fb427..53cfa20a263 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -66,6 +66,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/intel \ + -I$(top_builddir)/src/intel/genxml \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From 353d5bf286e1509af9ec2f1b8152d1f64790b52c Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 22 Feb 2016 10:19:43 -0800 Subject: anv/x11: Free swapchain images and memory on destroy --- src/intel/vulkan/anv_wsi_x11.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index 843a6b62504..a63cb6e7c5b 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -535,7 +535,11 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, cookie = xcb_free_pixmap(chain->conn, image->pixmap); xcb_discard_reply(chain->conn, cookie.sequence); - /* TODO: Delete images and free memory */ + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); } anv_free2(&chain->base.device->alloc, pAllocator, chain); -- cgit v1.2.3 From 2570a58bcdf30d699b89323fef60692093dee7ea Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Feb 2016 22:46:28 -0800 Subject: anv: Implement descriptor pools Descriptor pools are an optimization that lets applications allocate descriptor sets through an externally synchronized object (that is, unlocked). In our case it's also plugging a memory leak, since we didn't track all allocated sets and failed to free them in vkResetDescriptorPool() and vkDestroyDescriptorPool(). --- src/intel/vulkan/anv_descriptor_set.c | 189 +++++++++++++++++++++++++++------- src/intel/vulkan/anv_meta.c | 25 +++++ src/intel/vulkan/anv_meta_blit.c | 6 +- src/intel/vulkan/anv_meta_resolve.c | 8 +- src/intel/vulkan/anv_private.h | 17 +++ 5 files changed, 200 insertions(+), 45 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 7a77336602a..718bc216f73 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -244,17 +244,67 @@ void anv_DestroyPipelineLayout( } /* - * Descriptor pools. These are a no-op for now. + * Descriptor pools. + * + * These are implemented using a big pool of memory and a free-list for the + * host memory allocations and a state_stream and a free list for the buffer + * view surface state. The spec allows us to fail to allocate due to + * fragmentation in all cases but two: 1) after pool reset, allocating up + * until the pool size with no freeing must succeed and 2) allocating and + * freeing only descriptor sets with the same layout. Case 1) is easy enogh, + * and the free lists lets us recycle blocks for case 2). */ +#define EMPTY 1 + VkResult anv_CreateDescriptorPool( - VkDevice device, + VkDevice _device, const VkDescriptorPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool) { - anv_finishme("VkDescriptorPool is a stub"); - *pDescriptorPool = (VkDescriptorPool)1; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_pool *pool; + + uint32_t descriptor_count = 0; + uint32_t buffer_count = 0; + for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { + switch (pCreateInfo->pPoolSizes[i].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount; + default: + descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; + break; + } + } + + const size_t set_size = + sizeof(struct anv_descriptor_set) + + descriptor_count * sizeof(struct anv_descriptor) + + buffer_count * sizeof(struct anv_buffer_view); + + const size_t size = + sizeof(*pool) + + pCreateInfo->maxSets * set_size; + + pool = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!pool) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->size = size; + pool->next = 0; + pool->free_list = EMPTY; + + anv_state_stream_init(&pool->surface_state_stream, + &device->surface_state_block_pool); + pool->surface_state_free_list = NULL; + + *pDescriptorPool = anv_descriptor_pool_to_handle(pool); + return VK_SUCCESS; } @@ -263,37 +313,85 @@ void anv_DestroyDescriptorPool( VkDescriptorPool _pool, const VkAllocationCallbacks* pAllocator) { - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool); + + anv_state_stream_finish(&pool->surface_state_stream); + anv_free2(&device->alloc, pAllocator, pool); } VkResult anv_ResetDescriptorPool( - VkDevice device, + VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags) { - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); + + pool->next = 0; + pool->free_list = EMPTY; + anv_state_stream_finish(&pool->surface_state_stream); + anv_state_stream_init(&pool->surface_state_stream, + &device->surface_state_block_pool); + pool->surface_state_free_list = NULL; + return VK_SUCCESS; } +struct pool_free_list_entry { + uint32_t next; + uint32_t size; +}; + +static size_t +layout_size(const struct anv_descriptor_set_layout *layout) +{ + return + sizeof(struct anv_descriptor_set) + + layout->size * sizeof(struct anv_descriptor) + + layout->buffer_count * sizeof(struct anv_buffer_view); +} + +struct surface_state_free_list_entry { + void *next; + uint32_t offset; +}; + VkResult anv_descriptor_set_create(struct anv_device *device, + struct anv_descriptor_pool *pool, const struct anv_descriptor_set_layout *layout, struct anv_descriptor_set **out_set) { struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + const size_t size = layout_size(layout); + + set = NULL; + if (size <= pool->size - pool->next) { + set = (struct anv_descriptor_set *) (pool->data + pool->next); + pool->next += size; + } else { + struct pool_free_list_entry *entry; + uint32_t *link = &pool->free_list; + for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) { + entry = (struct pool_free_list_entry *) (pool->data + f); + if (size <= entry->size) { + *link = entry->next; + set = (struct anv_descriptor_set *) entry; + break; + } + link = &entry->next; + } + } - set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set) + if (set == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - + set->size = size; set->layout = layout; + set->buffer_views = + (struct anv_buffer_view *) &set->descriptors[layout->size]; + set->buffer_count = layout->buffer_count; /* Go through and fill out immutable samplers if we have any */ struct anv_descriptor *desc = set->descriptors; @@ -305,21 +403,24 @@ anv_descriptor_set_create(struct anv_device *device, desc += layout->binding[b].array_size; } - /* XXX: Use the pool */ - set->buffer_views = - anv_alloc(&device->alloc, - sizeof(set->buffer_views[0]) * layout->buffer_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set->buffer_views) { - anv_free(&device->alloc, set); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - + /* Allocate surface state for the buffer views. */ for (uint32_t b = 0; b < layout->buffer_count; b++) { - set->buffer_views[b].surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + struct surface_state_free_list_entry *entry = + pool->surface_state_free_list; + struct anv_state state; + + if (entry) { + state.map = entry; + state.offset = entry->offset; + state.alloc_size = 64; + pool->surface_state_free_list = entry->next; + } else { + state = anv_state_stream_alloc(&pool->surface_state_stream, 64, 64); + } + + set->buffer_views[b].surface_state = state; } - set->buffer_count = layout->buffer_count; + *out_set = set; return VK_SUCCESS; @@ -327,15 +428,27 @@ anv_descriptor_set_create(struct anv_device *device, void anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_pool *pool, struct anv_descriptor_set *set) { - /* XXX: Use the pool */ - for (uint32_t b = 0; b < set->buffer_count; b++) - anv_state_pool_free(&device->surface_state_pool, - set->buffer_views[b].surface_state); + /* Put the buffer view surface state back on the free list. */ + for (uint32_t b = 0; b < set->buffer_count; b++) { + struct surface_state_free_list_entry *entry = + set->buffer_views[b].surface_state.map; + entry->next = pool->surface_state_free_list; + pool->surface_state_free_list = entry; + } - anv_free(&device->alloc, set->buffer_views); - anv_free(&device->alloc, set); + /* Put the descriptor set allocation back on the free list. */ + const uint32_t index = (char *) set - pool->data; + if (index + set->size == pool->next) { + pool->next = index; + } else { + struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set; + entry->next = pool->free_list; + entry->size = set->size; + pool->free_list = (char *) entry - pool->data; + } } VkResult anv_AllocateDescriptorSets( @@ -344,6 +457,7 @@ VkResult anv_AllocateDescriptorSets( VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, pAllocateInfo->descriptorPool); VkResult result = VK_SUCCESS; struct anv_descriptor_set *set; @@ -353,7 +467,7 @@ VkResult anv_AllocateDescriptorSets( ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]); - result = anv_descriptor_set_create(device, layout, &set); + result = anv_descriptor_set_create(device, pool, layout, &set); if (result != VK_SUCCESS) break; @@ -374,11 +488,12 @@ VkResult anv_FreeDescriptorSets( const VkDescriptorSet* pDescriptorSets) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - anv_descriptor_set_destroy(device, set); + anv_descriptor_set_destroy(device, pool, set); } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 82944ea1a92..683a1623cc3 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -138,6 +138,27 @@ anv_device_init_meta(struct anv_device *device) .pfnFree = meta_free, }; + const VkDescriptorPoolCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }; + + result = anv_CreateDescriptorPool(anv_device_to_handle(device), + &create_info, + &device->meta_state.alloc, + &device->meta_state.desc_pool); + if (result != VK_SUCCESS) + goto fail_desc_pool; + result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) goto fail_clear; @@ -157,6 +178,10 @@ fail_blit: fail_resolve: anv_device_finish_meta_clear_state(device); fail_clear: + anv_DestroyDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, + &device->meta_state.alloc); +fail_desc_pool: return result; } diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 06f13ecc8db..9c6cd8c510e 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -165,7 +165,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VkFilter blit_filter) { struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; struct blit_vb_data { float pos[2]; @@ -248,7 +247,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, anv_AllocateDescriptorSets(anv_device_to_handle(device), &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool, + .descriptorPool = device->meta_state.desc_pool, .descriptorSetCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout }, &set); @@ -341,7 +340,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_ResetDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, 0); anv_DestroySampler(anv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index ea5020c5f24..9a77d21452f 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -483,7 +483,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; - VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; const struct vertex_attrs vertex_data[3] = { { @@ -564,7 +563,7 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, anv_AllocateDescriptorSets(device_h, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool_h, + .descriptorPool = device->meta_state.desc_pool, .descriptorSetCount = 1, .pSetLayouts = (VkDescriptorSetLayout[]) { device->meta_state.resolve.ds_layout, @@ -572,8 +571,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, }, &desc_set_h); - ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); - anv_UpdateDescriptorSets(device_h, /*writeCount*/ 1, (VkWriteDescriptorSet[]) { @@ -644,7 +641,8 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /* All objects below are consumed by the draw call. We may safely destroy * them. */ - anv_descriptor_set_destroy(device, desc_set); + anv_ResetDescriptorPool(anv_device_to_handle(device), + device->meta_state.desc_pool, 0); anv_DestroySampler(device_h, sampler_h, &cmd_buffer->pool->alloc); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 479f3826135..6ce3f02d1f7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -571,6 +571,8 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { VkAllocationCallbacks alloc; + VkDescriptorPool desc_pool; + /** * Use array element `i` for images with `2^i` samples. */ @@ -959,18 +961,32 @@ struct anv_descriptor { struct anv_descriptor_set { const struct anv_descriptor_set_layout *layout; + uint32_t size; uint32_t buffer_count; struct anv_buffer_view *buffer_views; struct anv_descriptor descriptors[0]; }; +struct anv_descriptor_pool { + uint32_t size; + uint32_t next; + uint32_t free_list; + + struct anv_state_stream surface_state_stream; + void *surface_state_free_list; + + char data[0]; +}; + VkResult anv_descriptor_set_create(struct anv_device *device, + struct anv_descriptor_pool *pool, const struct anv_descriptor_set_layout *layout, struct anv_descriptor_set **out_set); void anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); struct anv_pipeline_binding { @@ -1839,6 +1855,7 @@ ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -- cgit v1.2.3 From 442dff8cf4c99d67e7258e376d38ec32b92a2fbf Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 17:13:46 -0800 Subject: anv/descriptor_set: Stop marking everything as having dynamic offsets --- src/intel/vulkan/anv_descriptor_set.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 718bc216f73..b439f2a0c36 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -218,8 +218,10 @@ VkResult anv_CreatePipelineLayout( layout->set[set].dynamic_offset_start = dynamic_offset_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index >= 0) - dynamic_offset_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + dynamic_offset_count += set_layout->binding[b].array_size; for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { if (set_layout->binding[b].stage[s].surface_index >= 0) layout->stage[s].has_dynamic_offsets = true; -- cgit v1.2.3 From ae619a035573a2d13fb49537ef8769c97688e77f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 19:19:00 -0800 Subject: anv/state: Replace a bunch of ANV_GEN with GEN_GEN --- src/intel/vulkan/gen8_state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index fdde705f0d6..2686bfa8f3c 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -44,7 +44,7 @@ genX(init_device_state)(struct anv_device *device) batch.end = (void *) cmds + sizeof(cmds); anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 .MaskBits = 3, #endif .PipelineSelection = _3D); @@ -93,7 +93,7 @@ genX(init_device_state)(struct anv_device *device) ._8xSample6YOffset = 0.9375, ._8xSample7XOffset = 0.9375, ._8xSample7YOffset = 0.0625, -#if ANV_GEN >= 9 +#if GEN_GEN >= 9 ._16xSample0XOffset = 0.5625, ._16xSample0YOffset = 0.5625, ._16xSample1XOffset = 0.4375, @@ -194,7 +194,7 @@ static const uint8_t anv_valign[] = { static void get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) { - #if ANV_GENx10 >= 90 + #if GEN_GEN >= 9 if (isl_tiling_is_std_y(surf->tiling) || surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { /* The hardware ignores the alignment values. Anyway, the surface's @@ -239,7 +239,7 @@ get_qpitch(const struct isl_surf *surf) default: unreachable(!"bad isl_surf_dim"); case ISL_SURF_DIM_1D: - #if ANV_GENx10 >= 90 + #if GEN_GEN >= 9 /* QPitch is usually expressed as rows of surface elements (where * a surface element is an compression block or a single surface * sample). Skylake 1D is an outlier. @@ -256,7 +256,7 @@ get_qpitch(const struct isl_surf *surf) #endif case ISL_SURF_DIM_2D: case ISL_SURF_DIM_3D: - #if ANV_GEN >= 9 + #if GEN_GEN >= 9 return isl_surf_get_array_pitch_el_rows(surf); #else /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch @@ -452,7 +452,7 @@ VkResult genX(CreateSampler)( .SamplerDisable = false, .TextureBorderColorMode = DX10OGL, .LODPreClampMode = CLAMP_MODE_OGL, -#if ANV_GEN == 8 +#if GEN_GEN == 8 .BaseMipLevel = 0.0, #endif .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], -- cgit v1.2.3 From bfbb238dea91b1c2bde4f2f3eb20d39c95da3850 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 21:39:14 -0800 Subject: anv/descriptor_set: Set descriptor type for immuatable samplers --- src/intel/vulkan/anv_descriptor_set.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index b439f2a0c36..fe105b23f42 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -399,8 +399,17 @@ anv_descriptor_set_create(struct anv_device *device, struct anv_descriptor *desc = set->descriptors; for (uint32_t b = 0; b < layout->binding_count; b++) { if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].sampler = layout->binding[b].immutable_samplers[i]; + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { + /* The type will get changed to COMBINED_IMAGE_SAMPLER in + * UpdateDescriptorSets if needed. However, if the descriptor + * set has an immutable sampler, UpdateDescriptorSets may never + * touch it, so we need to make sure it's 100% valid now. + */ + desc[i] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = layout->binding[b].immutable_samplers[i], + }; + } } desc += layout->binding[b].array_size; } -- cgit v1.2.3 From bd3db3d6655beeb3da817a96d524f537092e386b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 17:04:19 -0800 Subject: anv/meta: Allocate descriptor pools on-the-fly We can't use a global descriptor pool like we were because it's not thread-safe. For now, we'll allocate them on-the-fly and that should work fine. At some point in the future, we could do something where we stack-allocate them or allocate them out of one of the state streams. --- src/intel/vulkan/anv_meta.c | 25 ------------------------- src/intel/vulkan/anv_meta_blit.c | 23 ++++++++++++++++++++--- src/intel/vulkan/anv_meta_resolve.c | 22 +++++++++++++++++++--- src/intel/vulkan/anv_private.h | 2 -- 4 files changed, 39 insertions(+), 33 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 683a1623cc3..82944ea1a92 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -138,27 +138,6 @@ anv_device_init_meta(struct anv_device *device) .pfnFree = meta_free, }; - const VkDescriptorPoolCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1 - }, - } - }; - - result = anv_CreateDescriptorPool(anv_device_to_handle(device), - &create_info, - &device->meta_state.alloc, - &device->meta_state.desc_pool); - if (result != VK_SUCCESS) - goto fail_desc_pool; - result = anv_device_init_meta_clear_state(device); if (result != VK_SUCCESS) goto fail_clear; @@ -178,10 +157,6 @@ fail_blit: fail_resolve: anv_device_finish_meta_clear_state(device); fail_clear: - anv_DestroyDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, - &device->meta_state.alloc); -fail_desc_pool: return result; } diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 9c6cd8c510e..8ef943aa512 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -243,14 +243,31 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, .minFilter = blit_filter, }, &cmd_buffer->pool->alloc, &sampler); + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + VkDescriptorSet set; anv_AllocateDescriptorSets(anv_device_to_handle(device), &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = device->meta_state.desc_pool, + .descriptorPool = desc_pool, .descriptorSetCount = 1, .pSetLayouts = &device->meta_state.blit.ds_layout }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), 1, /* writeCount */ (VkWriteDescriptorSet[]) { @@ -340,8 +357,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_ResetDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, 0); + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); anv_DestroySampler(anv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 9a77d21452f..8eb2548b5ae 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -559,11 +559,27 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc, &sampler_h); + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + VkDescriptorSet desc_set_h; anv_AllocateDescriptorSets(device_h, &(VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = device->meta_state.desc_pool, + .descriptorPool = desc_pool, .descriptorSetCount = 1, .pSetLayouts = (VkDescriptorSetLayout[]) { device->meta_state.resolve.ds_layout, @@ -641,8 +657,8 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /* All objects below are consumed by the draw call. We may safely destroy * them. */ - anv_ResetDescriptorPool(anv_device_to_handle(device), - device->meta_state.desc_pool, 0); + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); anv_DestroySampler(device_h, sampler_h, &cmd_buffer->pool->alloc); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6ce3f02d1f7..b1b4d265b89 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -571,8 +571,6 @@ void anv_finish_wsi(struct anv_instance *instance); struct anv_meta_state { VkAllocationCallbacks alloc; - VkDescriptorPool desc_pool; - /** * Use array element `i` for images with `2^i` samples. */ -- cgit v1.2.3 From f0f7cc22f3f061416c81cf80ccbe4a6a390082a7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2016 21:23:06 -0800 Subject: anv/descriptor_set: Use the correct size for the descriptor pool The descriptor sizes array gives the total number of each type of descriptor that will ever be allocated from the pool, not the total amount that may be in any particular set. In our case, this simply means that we have to sum a bunch of things up and there we go. --- src/intel/vulkan/anv_descriptor_set.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index fe105b23f42..dd645c3effc 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -283,14 +283,11 @@ VkResult anv_CreateDescriptorPool( } } - const size_t set_size = - sizeof(struct anv_descriptor_set) + - descriptor_count * sizeof(struct anv_descriptor) + - buffer_count * sizeof(struct anv_buffer_view); - const size_t size = sizeof(*pool) + - pCreateInfo->maxSets * set_size; + pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) + + descriptor_count * sizeof(struct anv_descriptor) + + buffer_count * sizeof(struct anv_buffer_view); pool = anv_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); -- cgit v1.2.3 From 1024a66fc4ff34a03ecfdf3ec053cb874fb206fe Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 15:23:06 -0800 Subject: anv: Emit 3DSTATE_URB_* via a loop. Rather than keeping separate {vs,hs,ds,gs}_start fields, we now store an array indexed by the shader stage (MESA_SHADER_*). The 3DSTATE_URB_* commands are also sequentially numbered. This makes it easy to just emit them in a loop. This simplifies the code a little, and also will make it easier to add more credible HS and DS code later. --- src/intel/vulkan/anv_pipeline.c | 20 ++++++++++++++------ src/intel/vulkan/anv_private.h | 9 +++------ src/intel/vulkan/genX_pipeline_util.h | 26 +++++++------------------- 3 files changed, 24 insertions(+), 31 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 27872d2769a..df78362ee70 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -870,13 +870,21 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) * - VS * - GS */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; + pipeline->urb.start[MESA_SHADER_VERTEX] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_VERTEX] = vs_size; + pipeline->urb.entries[MESA_SHADER_VERTEX] = nr_vs_entries; - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; + pipeline->urb.start[MESA_SHADER_GEOMETRY] = push_constant_chunks + vs_chunks; + pipeline->urb.size[MESA_SHADER_GEOMETRY] = gs_size; + pipeline->urb.entries[MESA_SHADER_GEOMETRY] = nr_gs_entries; + + pipeline->urb.start[MESA_SHADER_TESS_CTRL] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_TESS_CTRL] = 1; + pipeline->urb.entries[MESA_SHADER_TESS_CTRL] = 0; + + pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks; + pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; + pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b1b4d265b89..3e3cbf09a68 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1410,12 +1410,9 @@ struct anv_pipeline { uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { - uint32_t vs_start; - uint32_t vs_size; - uint32_t nr_vs_entries; - uint32_t gs_start; - uint32_t gs_size; - uint32_t nr_gs_entries; + uint32_t start[MESA_SHADER_GEOMETRY + 1]; + uint32_t size[MESA_SHADER_GEOMETRY + 1]; + uint32_t entries[MESA_SHADER_GEOMETRY + 1]; } urb; VkShaderStageFlags active_stages; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 51fbd8bf273..dea96a934b8 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -210,25 +210,13 @@ emit_urb_setup(struct anv_pipeline *pipeline) .ConstantBufferOffset = 8, .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + ._3DCommandSubOpcode = 48 + i, + .VSURBStartingAddress = pipeline->urb.start[i], + .VSURBEntryAllocationSize = pipeline->urb.size[i] - 1, + .VSNumberofURBEntries = pipeline->urb.entries[i]); + } } static inline uint32_t -- cgit v1.2.3 From 7f9b03cc8b44759895d5c4c42cfef8fa78269e7c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 15:46:23 -0800 Subject: anv: Emit 3DSTATE_PUSH_CONSTANT_ALLOC_* via a loop. Now we're emitting HS and DS packets as well. --- src/intel/vulkan/anv_pipeline.c | 6 ++++++ src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/genX_pipeline_util.h | 17 ++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df78362ee70..672640ac24c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -885,6 +885,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks; pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; + + pipeline->urb.push_size[MESA_SHADER_VERTEX] = 4; + pipeline->urb.push_size[MESA_SHADER_TESS_CTRL] = 0; + pipeline->urb.push_size[MESA_SHADER_TESS_EVAL] = 0; + pipeline->urb.push_size[MESA_SHADER_GEOMETRY] = 4; + pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = 4; } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3e3cbf09a68..041ad87f75d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1410,6 +1410,7 @@ struct anv_pipeline { uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { + uint8_t push_size[MESA_SHADER_FRAGMENT + 1]; uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; uint32_t entries[MESA_SHADER_GEOMETRY + 1]; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index dea96a934b8..cf4e0358741 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -200,15 +200,14 @@ emit_urb_setup(struct anv_pipeline *pipeline) .Address = { &device->workaround_bo, 0 }); #endif - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); + unsigned push_start = 0; + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + ._3DCommandSubOpcode = 18 + i, + .ConstantBufferOffset = push_start, + .ConstantBufferSize = pipeline->urb.push_size[i]); + push_start += pipeline->urb.push_size[i]; + } for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), -- cgit v1.2.3 From 3f115177302d1a969181649fde8c2332563aac73 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 17:26:15 -0800 Subject: anv: Properly size the push constant L3 area. We were assuming it was 32kB everywhere, reducing the available URB space. It's actually 16kB on Ivybridge, Baytrail, and Haswell GT1-2. --- src/intel/vulkan/anv_pipeline.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 672640ac24c..6c8d4add6e8 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -755,8 +755,6 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return VK_SUCCESS; } -static const int gen8_push_size = 32 * 1024; - static void gen7_compute_urb_partition(struct anv_pipeline *pipeline) { @@ -785,7 +783,14 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; +#if GEN_GEN >= 8 + unsigned push_constant_kb = 32; +#elif GEN_IS_HASWELL + unsigned push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; +#else + unsigned push_constant_kb = 16; +#endif + unsigned push_constant_bytes = push_constant_kb * 1024; unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; -- cgit v1.2.3 From 3ecd357d816dc71b2c6ebd6ace38c76ebb25674e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Feb 2016 17:28:22 -0800 Subject: anv: Allocate more push constant space. Previously we allocated 4kB of push constant space for VS, GS, and PS (for a total of 12kB) no matter what. This works, but doesn't fully utilize the space - we have 16kB or 32kB of space. This makes anv use the same method as brw - divide up the space evenly among all active shader stages. This means HS and DS would get space, if those shader stages existed. In the future, we can probably do better by inspecting how many push constants each shader stage uses, and weight things accordingly. But this is strictly better than the old code, and ideally we'd justify a fancier solution with actual performance data. --- src/intel/vulkan/anv_pipeline.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 6c8d4add6e8..92c5c35699c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -891,11 +891,17 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; - pipeline->urb.push_size[MESA_SHADER_VERTEX] = 4; - pipeline->urb.push_size[MESA_SHADER_TESS_CTRL] = 0; - pipeline->urb.push_size[MESA_SHADER_TESS_EVAL] = 0; - pipeline->urb.push_size[MESA_SHADER_GEOMETRY] = 4; - pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = 4; + const unsigned stages = + _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); + const unsigned size_per_stage = push_constant_kb / stages; + + for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { + pipeline->urb.push_size[i] = + (pipeline->active_stages & (1 << i)) ? size_per_stage : 1; + } + + pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = + push_constant_kb - size_per_stage * (stages - 1); } static void -- cgit v1.2.3 From 25c2470b24ce8411f6747eb887137b2511b6d529 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 11 Feb 2016 21:11:48 -0800 Subject: anv: Set max_hs_threads/max_ds_threads --- src/intel/vulkan/anv_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 92c5c35699c..1173b4f0cba 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -420,8 +420,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads, + [MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads, [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, -- cgit v1.2.3 From c32273d246e8bf46924d8852d1b3fd1d34194df2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 25 Feb 2016 08:52:35 -0800 Subject: anv/device: Properly handle apiVersion == 0 From the Vulkan 1.0 spec section 3.2: "If apiVersion is 0 the implementation must ignore it" --- src/intel/vulkan/anv_device.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 7a5cb234ac5..59930552f59 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -214,9 +214,14 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - uint32_t client_version = pCreateInfo->pApplicationInfo ? - pCreateInfo->pApplicationInfo->apiVersion : - VK_MAKE_VERSION(1, 0, 0); + uint32_t client_version; + if (pCreateInfo->pApplicationInfo && + pCreateInfo->pApplicationInfo->apiVersion != 0) { + client_version = pCreateInfo->pApplicationInfo->apiVersion; + } else { + client_version = VK_MAKE_VERSION(1, 0, 0); + } + if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 3)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, -- cgit v1.2.3 From b7bc52b5b18ac3f2d1e4ce399a701f4d272e7439 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 12:37:04 -0800 Subject: anv/gen8: Emit the 3DSTATE_PS_BLEND packet --- src/intel/vulkan/gen8_pipeline.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index dc15e2066c5..c9545c898f3 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -111,6 +111,7 @@ emit_cb_state(struct anv_pipeline *pipeline, .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, }; + bool has_writeable_rt = false; for (uint32_t i = 0; i < info->attachmentCount; i++) { const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; @@ -140,6 +141,9 @@ emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), }; + if (a->colorWriteMask != 0) + has_writeable_rt = true; + /* Our hardware applies the blend factor prior to the blend function * regardless of what function is used. Technically, this means the * hardware can do MORE than GL or Vulkan specify. However, it also @@ -165,6 +169,25 @@ emit_cb_state(struct anv_pipeline *pipeline, blend_state.Entry[i].WriteDisableBlue = true; } + if (info->attachmentCount > 0) { + struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), + .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, + .HasWriteableRT = has_writeable_rt, + .ColorBufferBlendEnable = bs->ColorBufferBlendEnable, + .SourceAlphaBlendFactor = bs->SourceAlphaBlendFactor, + .DestinationAlphaBlendFactor = + bs->DestinationAlphaBlendFactor, + .SourceBlendFactor = bs->SourceBlendFactor, + .DestinationBlendFactor = bs->DestinationBlendFactor, + .AlphaTestEnable = false, + .IndependentAlphaBlendEnable = + blend_state.IndependentAlphaBlendEnable); + } else { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND)); + } + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); if (!device->info.has_llc) anv_state_clflush(pipeline->blend_state); -- cgit v1.2.3 From 6bb6b5c341a5a75763d565565f164b63cff3388a Mon Sep 17 00:00:00 2001 From: Thomas Hindoe Paaboel Andersen Date: Thu, 25 Feb 2016 22:00:09 +0100 Subject: anv: remove stray ; after if Both logic and indentation suggests that the ; were not intended here. Reviewed-by: Matt Turner Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index b060828cf61..827c3ed4142 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -465,7 +465,7 @@ void anv_CmdSetViewport( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstViewport + viewportCount; - if (cmd_buffer->state.dynamic.viewport.count < total_count); + if (cmd_buffer->state.dynamic.viewport.count < total_count) cmd_buffer->state.dynamic.viewport.count = total_count; memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, @@ -483,7 +483,7 @@ void anv_CmdSetScissor( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); const uint32_t total_count = firstScissor + scissorCount; - if (cmd_buffer->state.dynamic.scissor.count < total_count); + if (cmd_buffer->state.dynamic.scissor.count < total_count) cmd_buffer->state.dynamic.scissor.count = total_count; memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, -- cgit v1.2.3 From 452782f68b3e8d25538fbe65b942c0af7c3bb147 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 15:51:36 -0800 Subject: gen/genX_pack: Add genxml to the pack header path If you have an out-of-tree build, gen8_pack.h and friends will not be in the same folder as genX_pack.h so this will be a problem. We fixed out-of-tree earlier by adding the genxml folder to the includes for the vulkan driver. However, this is not a good long-term solution because we want to use it in ISL as well. --- src/intel/genxml/genX_pack.h | 8 ++++---- src/intel/vulkan/Makefile.am | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/genxml/genX_pack.h b/src/intel/genxml/genX_pack.h index 69fc340762b..7967c292645 100644 --- a/src/intel/genxml/genX_pack.h +++ b/src/intel/genxml/genX_pack.h @@ -28,13 +28,13 @@ #endif #if (GEN_VERSIONx10 == 70) -# include "gen7_pack.h" +# include "genxml/gen7_pack.h" #elif (GEN_VERSIONx10 == 75) -# include "gen75_pack.h" +# include "genxml/gen75_pack.h" #elif (GEN_VERSIONx10 == 80) -# include "gen8_pack.h" +# include "genxml/gen8_pack.h" #elif (GEN_VERSIONx10 == 90) -# include "gen9_pack.h" +# include "genxml/gen9_pack.h" #else # error "Need to add a pack header include for this gen" #endif diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 53cfa20a263..6be4f9fb427 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -66,7 +66,6 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/intel \ - -I$(top_builddir)/src/intel/genxml \ -I$(top_builddir)/src/vulkan libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init -- cgit v1.2.3 From 4a9b805ce55b495576627465b7cca034b468653a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 16:22:47 -0800 Subject: anv/device: Store the default MOCS in the device --- src/intel/vulkan/anv_private.h | 16 +++++++++------- src/intel/vulkan/gen7_state.c | 3 +++ src/intel/vulkan/gen8_state.c | 3 +++ 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 041ad87f75d..2f3a6597ac2 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -680,6 +680,8 @@ struct anv_device { struct anv_block_pool scratch_block_pool; + uint32_t default_mocs; + pthread_mutex_t mutex; }; @@ -859,19 +861,19 @@ __gen_combine_address(struct anv_batch *batch, void *location, .L3CacheabilityControlL3CC = 1, \ } -#define GEN8_MOCS { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ +#define GEN8_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ } /* Skylake: MOCS is now an index into an array of 62 different caching * configurations programmed by the kernel. */ -#define GEN9_MOCS { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ +#define GEN9_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ } #define GEN9_MOCS_PTE { \ diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 5323c378d02..48c41faf57f 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -37,6 +37,9 @@ VkResult genX(init_device_state)(struct anv_device *device) { + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + struct anv_batch batch; uint32_t cmds[64]; diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 2686bfa8f3c..6226eba43ec 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -37,6 +37,9 @@ VkResult genX(init_device_state)(struct anv_device *device) { + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + struct anv_batch batch; uint32_t cmds[64]; -- cgit v1.2.3 From ded57c3ccaf79d139ec64ce6711f4de0fea681c7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 11:45:50 -0800 Subject: anv: Use ISL to fill out surface states --- src/intel/vulkan/anv_image.c | 150 +++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 71 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 0a412a3f8c6..11ceea38829 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -427,37 +427,6 @@ anv_validate_CreateImageView(VkDevice _device, return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); } -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - else - gen7_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 8: - gen8_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 9: - gen9_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - static struct anv_state alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) @@ -479,7 +448,7 @@ has_matching_storage_typed_format(const struct anv_device *device, device->info.gen >= 9); } -static VkComponentSwizzle +static enum isl_channel_select remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, struct anv_format_swizzle format_swizzle) { @@ -488,17 +457,17 @@ remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, switch (swizzle) { case VK_COMPONENT_SWIZZLE_ZERO: - return VK_COMPONENT_SWIZZLE_ZERO; + return ISL_CHANNEL_SELECT_ZERO; case VK_COMPONENT_SWIZZLE_ONE: - return VK_COMPONENT_SWIZZLE_ONE; + return ISL_CHANNEL_SELECT_ONE; case VK_COMPONENT_SWIZZLE_R: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + return ISL_CHANNEL_SELECT_RED + format_swizzle.r; case VK_COMPONENT_SWIZZLE_G: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + return ISL_CHANNEL_SELECT_RED + format_swizzle.g; case VK_COMPONENT_SWIZZLE_B: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + return ISL_CHANNEL_SELECT_RED + format_swizzle.b; case VK_COMPONENT_SWIZZLE_A: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + return ISL_CHANNEL_SELECT_RED + format_swizzle.a; default: unreachable("Invalid swizzle"); } @@ -513,8 +482,6 @@ anv_image_view_init(struct anv_image_view *iview, { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - VkImageViewCreateInfo mCreateInfo; - memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); assert(range->layerCount > 0); assert(range->baseMipLevel < image->levels); @@ -549,18 +516,30 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_format_swizzle swizzle; iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, image->tiling, &swizzle); - iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R, swizzle); - iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G, swizzle); - iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B, swizzle); - iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A, swizzle); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; + struct isl_view isl_view = { + .format = iview->format, + .base_level = range->baseMipLevel, + .levels = range->levelCount, + .base_array_layer = range->baseArrayLayer, + .array_len = range->layerCount, + .channel_select = { + remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle), + remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle), + remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle), + remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle), + }, + }; + + struct isl_extent4d level0_extent_px; + if (!isl_format_is_compressed(iview->format) && isl_format_is_compressed(image->format->isl_format)) { /* Scale the ImageView extent by the backing Image. This is used @@ -570,31 +549,46 @@ anv_image_view_init(struct anv_image_view *iview, */ const struct isl_format_layout * isl_layout = image->format->isl_layout; - iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); - iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + level0_extent_px.depth = anv_minify(image->extent.depth, range->baseMipLevel); + level0_extent_px.depth = DIV_ROUND_UP(level0_extent_px.depth, isl_layout->bd); - iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); - mCreateInfo.subresourceRange.baseMipLevel = 0; - mCreateInfo.subresourceRange.baseArrayLayer = 0; + level0_extent_px.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + level0_extent_px.width = isl_surf_get_row_pitch_el(&surface->isl); + isl_view.base_level = 0; + isl_view.base_array_layer = 0; } else { - iview->level_0_extent.width = image->extent.width; - iview->level_0_extent.height = image->extent.height; - iview->level_0_extent.depth = image->extent.depth; + level0_extent_px.width = image->extent.width; + level0_extent_px.height = image->extent.height; + level0_extent_px.depth = image->extent.depth; } iview->extent = (VkExtent3D) { - .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), - .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), - .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + .width = anv_minify(image->extent.width , range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth , range->baseMipLevel), }; + isl_surf_usage_flags_t cube_usage; + if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE || + pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { + cube_usage = ISL_SURF_USAGE_CUBE_BIT; + } else { + cube_usage = 0; + } + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->sampler_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); + isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->sampler_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + + if (!device->info.has_llc) + anv_state_clflush(iview->sampler_surface_state); } else { iview->sampler_surface_state.alloc_size = 0; } @@ -602,9 +596,16 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - anv_fill_image_surface_state(device, iview->color_rt_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->color_rt_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + + if (!device->info.has_llc) + anv_state_clflush(iview->color_rt_surface_state); } else { iview->color_rt_surface_state.alloc_size = 0; } @@ -612,16 +613,23 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - if (has_matching_storage_typed_format(device, iview->format)) - anv_fill_image_surface_state(device, iview->storage_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - else + if (has_matching_storage_typed_format(device, iview->format)) { + isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT; + isl_surf_fill_state(&device->isl_dev, + iview->storage_surface_state.map, + .surf = &surface->isl, + .view = &isl_view, + .mocs = device->default_mocs, + .level0_extent_px = level0_extent_px); + } else { anv_fill_buffer_surface_state(device, iview->storage_surface_state, ISL_FORMAT_RAW, iview->offset, iview->bo->size - iview->offset, 1); + } + if (!device->info.has_llc) + anv_state_clflush(iview->storage_surface_state); } else { iview->storage_surface_state.alloc_size = 0; } -- cgit v1.2.3 From b70a8d40fa1bdb21376b96534c846ba8c1c82878 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 21:39:46 -0800 Subject: anv/state: Remove unused fill_surface_state functions --- src/intel/vulkan/gen7_state.c | 111 ---------------- src/intel/vulkan/gen8_state.c | 261 ------------------------------------- src/intel/vulkan/genX_state_util.h | 47 ------- 3 files changed, 419 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index 48c41faf57f..ac5b5ed21cd 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -154,114 +154,3 @@ VkResult genX(CreateSampler)( return VK_SUCCESS; } - -static const uint8_t anv_halign[] = { - [4] = HALIGN_4, - [8] = HALIGN_8, -}; - -static const uint8_t anv_valign[] = { - [2] = VALIGN_2, - [4] = VALIGN_4, -}; - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - uint32_t depth = 1; - if (range->layerCount > 1) { - depth = range->layerCount; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(&surface->isl); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, - usage == VK_IMAGE_USAGE_STORAGE_BIT), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], - - /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if - * Tiled Surface is False." - */ - .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, - .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? - TILEWALK_YMAJOR : TILEWALK_XMAJOR, - - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - - .RenderCacheReadWriteMode = 0, /* TEMPLATE */ - - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->isl.row_pitch - 1, - .MinimumArrayElement = range->baseArrayLayer, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - .SurfaceObjectControlState = GENX(MOCS), - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .MCSEnable = false, -# if (GEN_IS_HASWELL) - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], -# else /* XXX: Seriously? */ - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, -# endif - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 6226eba43ec..1ecd34058d9 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -139,13 +139,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -static const uint32_t -isl_to_gen_multisample_layout[] = { - [ISL_MSAA_LAYOUT_NONE] = MSS, - [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, - [ISL_MSAA_LAYOUT_ARRAY] = MSS, -}; - void genX(fill_buffer_surface_state)(void *state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) @@ -178,260 +171,6 @@ genX(fill_buffer_surface_state)(void *state, enum isl_format format, GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); } -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - -/** - * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment - * and SurfaceVerticalAlignment. - */ -static void -get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) -{ - #if GEN_GEN >= 9 - if (isl_tiling_is_std_y(surf->tiling) || - surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { - /* The hardware ignores the alignment values. Anyway, the surface's - * true alignment is likely outside the enum range of HALIGN* and - * VALIGN*. - */ - *halign = 0; - *valign = 0; - } else { - /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units - * of surface elements (not pixels nor samples). For compressed formats, - * a "surface element" is defined as a compression block. For example, - * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 - * format (ETC2 has a block height of 4), then the vertical alignment is - * 4 compression blocks or, equivalently, 16 pixels. - */ - struct isl_extent3d image_align_el - = isl_surf_get_image_alignment_el(surf); - - *halign = anv_halign[image_align_el.width]; - *valign = anv_valign[image_align_el.height]; - } - #else - /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in - * units of surface samples. For example, if SurfaceVerticalAlignment - * is VALIGN_4 and the surface is singlesampled, then for any surface - * format (compressed or not) the vertical alignment is - * 4 pixels. - */ - struct isl_extent3d image_align_sa - = isl_surf_get_image_alignment_sa(surf); - - *halign = anv_halign[image_align_sa.width]; - *valign = anv_valign[image_align_sa.height]; - #endif -} - -static uint32_t -get_qpitch(const struct isl_surf *surf) -{ - switch (surf->dim) { - default: - unreachable(!"bad isl_surf_dim"); - case ISL_SURF_DIM_1D: - #if GEN_GEN >= 9 - /* QPitch is usually expressed as rows of surface elements (where - * a surface element is an compression block or a single surface - * sample). Skylake 1D is an outlier. - * - * From the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> 1D Surfaces: - * - * Surface QPitch specifies the distance in pixels between array - * slices. - */ - return isl_surf_get_array_pitch_el(surf); - #else - return isl_surf_get_array_pitch_el_rows(surf); - #endif - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - #if GEN_GEN >= 9 - return isl_surf_get_array_pitch_el_rows(surf); - #else - /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch - * - * "This field must be set to an integer multiple of the Surface - * Vertical Alignment. For compressed textures (BC*, FXT1, - * ETC*, and EAC* Surface Formats), this field is in units of - * rows in the uncompressed surface, and must be set to an - * integer multiple of the vertical alignment parameter "j" - * defined in the Common Surface Formats section." - */ - return isl_surf_get_array_pitch_sa_rows(surf); - #endif - } -} - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - static const uint8_t isl_to_gen_tiling[] = { - [ISL_TILING_LINEAR] = LINEAR, - [ISL_TILING_X] = XMAJOR, - [ISL_TILING_Y0] = YMAJOR, - [ISL_TILING_Yf] = YMAJOR, - [ISL_TILING_Ys] = YMAJOR, - [ISL_TILING_W] = WMAJOR, - }; - - uint32_t halign, valign; - get_halign_valign(&surface->isl, &halign, &valign); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = valign, - .SurfaceHorizontalAlignment = halign, - .TileMode = isl_to_gen_tiling[surface->isl.tiling], - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .CubeFaceEnablePositiveZ = 1, - .CubeFaceEnableNegativeZ = 1, - .CubeFaceEnablePositiveY = 1, - .CubeFaceEnableNegativeY = 1, - .CubeFaceEnablePositiveX = 1, - .CubeFaceEnableNegativeX = 1, - .MemoryObjectControlState = GENX(MOCS), - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, - .Height = iview->level_0_extent.height - 1, - .Width = iview->level_0_extent.width - 1, - .Depth = 0, /* TEMPLATE */ - .SurfacePitch = surface->isl.row_pitch - 1, - .RenderTargetViewExtent = 0, /* TEMPLATE */ - .MinimumArrayElement = 0, /* TEMPLATE */ - .MultisampledSurfaceStorageFormat = - isl_to_gen_multisample_layout[surface->isl.msaa_layout], - .NumberofMultisamples = ffs(surface->isl.samples) - 1, - .MultisamplePositionPaletteIndex = 0, /* UNUSED */ - .XOffset = 0, - .YOffset = 0, - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - switch (template.SurfaceType) { - case SURFTYPE_1D: - case SURFTYPE_2D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - * - * In other words, 'Depth' is the number of array layers. - */ - template.Depth = range->layerCount - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. - */ - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_CUBE: - template.MinimumArrayElement = range->baseArrayLayer; - /* Same as SURFTYPE_2D, but divided by 6 */ - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_3D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - template.Depth = image->extent.depth - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - template.RenderTargetViewExtent = iview->extent.depth - 1; - break; - default: - unreachable(!"bad SurfaceType"); - } - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h index 10b3a9f42c5..aabcea9c183 100644 --- a/src/intel/vulkan/genX_state_util.h +++ b/src/intel/vulkan/genX_state_util.h @@ -21,53 +21,6 @@ * IN THE SOFTWARE. */ -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} - -static enum isl_format -anv_surface_format(const struct anv_device *device, enum isl_format format, - bool storage) -{ - if (storage) { - return isl_lower_storage_image_format(&device->isl_dev, format); - } else { - return format; - } -} - -#if GEN_GEN > 7 || GEN_IS_HASWELL -static const uint32_t vk_to_gen_swizzle[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; -#endif - static inline uint32_t vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) { -- cgit v1.2.3 From 9d5b8f7709d7cce1493cc0b38c750ad1173f7327 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 20 Feb 2016 21:40:25 -0800 Subject: anv: Remove unneeded fiels from anv_image_view --- src/intel/vulkan/anv_image.c | 11 ++++++----- src/intel/vulkan/anv_private.h | 3 --- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 11ceea38829..145db6de039 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -514,14 +514,15 @@ anv_image_view_init(struct anv_image_view *iview, iview->vk_format = pCreateInfo->format; struct anv_format_swizzle swizzle; - iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, - image->tiling, &swizzle); + enum isl_format format = anv_get_isl_format(pCreateInfo->format, + range->aspectMask, + image->tiling, &swizzle); iview->base_layer = range->baseArrayLayer; iview->base_mip = range->baseMipLevel; struct isl_view isl_view = { - .format = iview->format, + .format = format, .base_level = range->baseMipLevel, .levels = range->levelCount, .base_array_layer = range->baseArrayLayer, @@ -540,7 +541,7 @@ anv_image_view_init(struct anv_image_view *iview, struct isl_extent4d level0_extent_px; - if (!isl_format_is_compressed(iview->format) && + if (!isl_format_is_compressed(format) && isl_format_is_compressed(image->format->isl_format)) { /* Scale the ImageView extent by the backing Image. This is used * internally when an uncompressed ImageView is created on a @@ -613,7 +614,7 @@ anv_image_view_init(struct anv_image_view *iview, if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - if (has_matching_storage_typed_format(device, iview->format)) { + if (has_matching_storage_typed_format(device, format)) { isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT; isl_surf_fill_state(&device->isl_dev, iview->storage_surface_state.map, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 2f3a6597ac2..e50a6dbdbfe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1647,11 +1647,8 @@ struct anv_image_view { VkImageAspectFlags aspect_mask; VkFormat vk_format; - VkComponentMapping swizzle; - enum isl_format format; uint32_t base_layer; uint32_t base_mip; - VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ /** RENDER_SURFACE_STATE when using image as a color render target. */ -- cgit v1.2.3 From eb19d640eb46249ba20734aa478c84fadfc35d80 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 22 Feb 2016 16:54:25 -0800 Subject: anv: Use isl to fill buffer surface states --- src/intel/vulkan/anv_device.c | 24 ++++++------------------ src/intel/vulkan/gen7_state.c | 31 ------------------------------- src/intel/vulkan/gen8_state.c | 32 -------------------------------- 3 files changed, 6 insertions(+), 81 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 59930552f59..857c4b1d1f4 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1697,24 +1697,12 @@ anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, enum isl_format format, uint32_t offset, uint32_t range, uint32_t stride) { - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_buffer_surface_state(state.map, format, offset, range, - stride); - else - gen7_fill_buffer_surface_state(state.map, format, offset, range, - stride); - break; - case 8: - gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - case 9: - gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - default: - unreachable("unsupported gen\n"); - } + isl_buffer_fill_state(&device->isl_dev, state.map, + .address = offset, + .mocs = device->default_mocs, + .size = range, + .format = format, + .stride = stride); if (!device->info.has_llc) anv_state_clflush(state); diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c index ac5b5ed21cd..1360697f0de 100644 --- a/src/intel/vulkan/gen7_state.c +++ b/src/intel/vulkan/gen7_state.c @@ -63,37 +63,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN_4, - .SurfaceHorizontalAlignment = HALIGN_4, - .TiledSurface = false, - .RenderCacheReadWriteMode = false, - .SurfaceObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, -# if (GEN_IS_HASWELL) - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, -# endif - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c index 1ecd34058d9..784269b98cb 100644 --- a/src/intel/vulkan/gen8_state.c +++ b/src/intel/vulkan/gen8_state.c @@ -139,38 +139,6 @@ genX(init_device_state)(struct anv_device *device) return anv_device_submit_simple_batch(device, &batch); } -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - VkResult genX(CreateSampler)( VkDevice _device, const VkSamplerCreateInfo* pCreateInfo, -- cgit v1.2.3 From e9d126f23b66751ae644c3125668ecf5d1e0f86b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 19:49:12 -0800 Subject: anv/image: Add a ussage_mask field to image_view_init This allows us to avoid doing some unneeded work on the meta paths where we know that the image view will be used for exactly one thing. The meta paths also sometimes do things that aren't quite valid like setting the array slice on a 3-D texture and we want to limit the number of paths that need to be able to sensibly handle the lies. --- src/intel/vulkan/anv_image.c | 11 ++++++----- src/intel/vulkan/anv_meta_blit.c | 20 ++++++++++---------- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 4 ++-- src/intel/vulkan/anv_private.h | 3 ++- 5 files changed, 21 insertions(+), 19 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 145db6de039..4caab5804ae 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -478,7 +478,8 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset) + uint32_t offset, + VkImageUsageFlags usage_mask) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; @@ -577,7 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, cube_usage = 0; } - if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_SAMPLED_BIT) { iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT; @@ -594,7 +595,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->sampler_surface_state.alloc_size = 0; } - if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT; @@ -611,7 +612,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state.alloc_size = 0; } - if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (image->usage & usage_mask & VK_IMAGE_USAGE_STORAGE_BIT) { iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); if (has_matching_storage_typed_format(device, format)) { @@ -650,7 +651,7 @@ anv_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL, 0); + anv_image_view_init(view, device, pCreateInfo, NULL, 0, ~0); *pView = anv_image_view_to_handle(view); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8ef943aa512..2ec428b5f4a 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -451,7 +451,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -468,7 +468,7 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), @@ -711,7 +711,7 @@ void anv_CmdCopyImage( .layerCount = pRegions[r].dstSubresource.layerCount, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); const uint32_t dest_base_array_slice = anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, @@ -758,7 +758,7 @@ void anv_CmdCopyImage( .layerCount = 1 }, }, - cmd_buffer, img_o); + cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].extent); @@ -826,7 +826,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffsets[0].x, @@ -876,7 +876,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, src_image, &src_iview, @@ -991,7 +991,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); uint32_t img_x = 0; uint32_t img_y = 0; @@ -1025,7 +1025,7 @@ void anv_CmdCopyBufferToImage( .layerCount = 1 }, }, - cmd_buffer, img_o); + cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, &pRegions[r].imageExtent); @@ -1104,7 +1104,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = pRegions[r].imageSubresource.layerCount, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image *dest_image = make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, @@ -1140,7 +1140,7 @@ void anv_CmdCopyImageToBuffer( .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(srcImage), diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 739ae09582c..227f8f35115 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -811,7 +811,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 8eb2548b5ae..19fb3ad3003 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -737,7 +737,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -754,7 +754,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0); + cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb_h; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e50a6dbdbfe..05cb77c5509 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1680,7 +1680,8 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset); + uint32_t offset, + VkImageUsageFlags usage_mask); void anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, -- cgit v1.2.3 From 7363024cbda0c513ad76d22580ce363f30ae1eda Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 11:38:14 -0800 Subject: anv: Fill out image_param structs at view creation time --- src/intel/vulkan/anv_cmd_buffer.c | 6 ++---- src/intel/vulkan/anv_image.c | 5 +++++ src/intel/vulkan/anv_private.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 827c3ed4142..418a143b7bc 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -812,8 +812,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct brw_image_param *image_param = &cmd_buffer->state.push_constants[stage]->images[image++]; - anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, - image_param); + *image_param = desc->image_view->storage_image_param; image_param->surface_idx = bias + s; break; } @@ -838,8 +837,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct brw_image_param *image_param = &cmd_buffer->state.push_constants[stage]->images[image++]; - anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, - image_param); + *image_param = desc->buffer_view->storage_image_param; image_param->surface_idx = bias + s; break; diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 4caab5804ae..72f29f1dba8 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -630,6 +630,9 @@ anv_image_view_init(struct anv_image_view *iview, iview->bo->size - iview->offset, 1); } + anv_image_view_fill_image_param(device, iview, + &iview->storage_image_param); + if (!device->info.has_llc) anv_state_clflush(iview->storage_surface_state); } else { @@ -734,6 +737,8 @@ anv_CreateBufferView(VkDevice _device, (storage_format == ISL_FORMAT_RAW ? 1 : format->isl_layout->bs)); + anv_buffer_view_fill_image_param(device, view, + &view->storage_image_param); } else { view->storage_surface_state = (struct anv_state){ 0 }; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 05cb77c5509..cb4f9736fdf 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -41,6 +41,7 @@ #endif #include "brw_device_info.h" +#include "brw_compiler.h" #include "util/macros.h" #include "util/list.h" @@ -1659,6 +1660,8 @@ struct anv_image_view { /** RENDER_SURFACE_STATE when using image as a storage image. */ struct anv_state storage_surface_state; + + struct brw_image_param storage_image_param; }; struct anv_image_create_info { @@ -1717,6 +1720,8 @@ struct anv_buffer_view { struct anv_state surface_state; struct anv_state storage_surface_state; + + struct brw_image_param storage_image_param; }; const struct anv_format * -- cgit v1.2.3 From 4b34f2ccb8b97aaf46b2dadb8098463969064753 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 24 Feb 2016 12:50:31 -0800 Subject: anv/image: Use isl for filling brw_image_param --- src/intel/vulkan/anv_image.c | 117 +++---------------------------------------- 1 file changed, 6 insertions(+), 111 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 72f29f1dba8..46cf2413468 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -630,8 +630,9 @@ anv_image_view_init(struct anv_image_view *iview, iview->bo->size - iview->offset, 1); } - anv_image_view_fill_image_param(device, iview, - &iview->storage_image_param); + isl_surf_fill_image_param(&device->isl_dev, + &iview->storage_image_param, + &surface->isl, &isl_view); if (!device->info.has_llc) anv_state_clflush(iview->storage_surface_state); @@ -737,8 +738,9 @@ anv_CreateBufferView(VkDevice _device, (storage_format == ISL_FORMAT_RAW ? 1 : format->isl_layout->bs)); - anv_buffer_view_fill_image_param(device, view, - &view->storage_image_param); + isl_buffer_fill_image_param(&device->isl_dev, + &view->storage_image_param, + view->format, view->range); } else { view->storage_surface_state = (struct anv_state){ 0 }; } @@ -817,110 +819,3 @@ anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlag return NULL; } } - -static void -image_param_defaults(struct brw_image_param *param) -{ - memset(param, 0, sizeof *param); - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; -} - -void -anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - const struct isl_surf *surf = &view->image->color_surface.isl; - const int cpp = isl_format_get_layout(surf->format)->bs; - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - param->size[0] = view->extent.width; - param->size[1] = view->extent.height; - if (surf->dim == ISL_SURF_DIM_3D) { - param->size[2] = view->extent.depth; - } else { - param->size[2] = surf->logical_level0_px.array_len - view->base_layer; - } - - isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, - ¶m->offset[0], ¶m->offset[1]); - - param->stride[0] = cpp; - param->stride[1] = surf->row_pitch / cpp; - - if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { - param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); - param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); - } else { - param->stride[2] = 0; - param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); - } - - switch (surf->tiling) { - case ISL_TILING_LINEAR: - /* image_param_defaults is good enough */ - break; - - case ISL_TILING_X: - /* An X tile is a rectangular block of 512x8 bytes. */ - param->tiling[0] = util_logbase2(512 / cpp); - param->tiling[1] = util_logbase2(8); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shifts required to swizzle bits 9 and 10 of the memory - * address with bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 4; - } - break; - - case ISL_TILING_Y0: - /* The layout of a Y-tiled surface in memory isn't really fundamentally - * different to the layout of an X-tiled surface, we simply pretend that - * the surface is broken up in a number of smaller 16Bx32 tiles, each - * one arranged in X-major order just like is the case for X-tiling. - */ - param->tiling[0] = util_logbase2(16 / cpp); - param->tiling[1] = util_logbase2(32); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shift required to swizzle bit 9 of the memory address with - * bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 0xff; - } - break; - - default: - assert(!"Unhandled storage image tiling"); - } - - /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The - * address calculation algorithm (emit_address_calculation() in - * brw_fs_surface_builder.cpp) handles this as a sort of tiling with - * modulus equal to the LOD. - */ - param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? - view->base_mip : 0); -} - -void -anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - param->stride[0] = isl_format_layouts[view->format].bs; - param->size[0] = view->range / param->stride[0]; -} -- cgit v1.2.3 From ad50896c8769adcf141619774f8c156a2bcf920a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:26:04 -0800 Subject: anv/gen7: Only try to get the depth format the surface has depth --- src/intel/vulkan/gen7_cmd_buffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 7377487cf7e..9681f22dc3d 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -458,7 +458,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; - const uint32_t depth_format = image ? + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const uint32_t depth_format = has_depth ? isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, &image->depth_surface.isl) : D16_UNORM; -- cgit v1.2.3 From e18a2f037a074788ee3cf6cb00697b5b0152fe29 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:43:14 -0800 Subject: anv/gen7: Set MaximumNumberofThreads in the dummy PS packet --- src/intel/vulkan/gen7_pipeline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 009a79ac815..2167f296b2f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -338,7 +338,11 @@ genX(graphics_pipeline_create)( .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + /* Even if no fragments are ever dispatched, the hardware hangs if we + * don't at least set the maximum number of threads. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; -- cgit v1.2.3 From 46b7c242da7c7c9ea7877a2c4b1fecdf5c1c0452 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 27 Feb 2016 09:46:40 -0800 Subject: anv/gen7: Clean up the dummy PS case Fix whitespace and remove dead comments --- src/intel/vulkan/gen7_pipeline.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 2167f296b2f..7151e36f17d 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -323,27 +323,21 @@ genX(graphics_pipeline_create)( } if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_finishme("disabling ps"); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE)); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), - .StatisticsEnable = true, - .ThreadDispatchEnable = false, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - - - /* Even if no fragments are ever dispatched, the hardware hangs if we - * don't at least set the maximum number of threads. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .MaximumNumberofThreads = device->info.max_wm_threads - 1); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + /* Even if no fragments are ever dispatched, the hardware hangs if we + * don't at least set the maximum number of threads. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || -- cgit v1.2.3 From 45d8ce07a5838977bd875fdeb008ccecc6eb976e Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:44:08 -0800 Subject: anv/pipeline: Set stage URB size to zero if it is unused Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 1173b4f0cba..f6e3aedda40 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -897,7 +897,7 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = - (pipeline->active_stages & (1 << i)) ? size_per_stage : 1; + (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; } pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = -- cgit v1.2.3 From ef06ddb08a066a72b9a98cd2fbef8a74c99b8b32 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:47:35 -0800 Subject: anv/pipeline: Set FS URB space to zero if the FS is unused Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_pipeline.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index f6e3aedda40..81d0d9c9bd9 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,14 +894,17 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); const unsigned size_per_stage = push_constant_kb / stages; + unsigned used_kb = 0; for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; + used_kb += pipeline->urb.push_size[i]; + assert(used_kb <= push_constant_kb); } pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = - push_constant_kb - size_per_stage * (stages - 1); + push_constant_kb - used_kb; } static void -- cgit v1.2.3 From 72efb68d48082a3da819ca47adc12733a3e8d105 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 28 Feb 2016 10:39:17 -0800 Subject: anv/pipeline: Set URB offset to zero if size is zero After 3ecd357d816dc71b2c6ebd6ace38c76ebb25674e, it may be possible for the VS to get assigned all of the URB space. On Ivy Bridge, this will cause the offset for the other stages to be 16, which cannot be packed into the ConstantBufferOffset field of 3DSTATE_PUSH_CONSTANT_ALLOC_*. Instead we can set the offset to zero if the stage size is zero. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_pipeline_util.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index cf4e0358741..d940aba67b5 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -202,10 +202,11 @@ emit_urb_setup(struct anv_pipeline *pipeline) unsigned push_start = 0; for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + unsigned push_size = pipeline->urb.push_size[i]; anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), ._3DCommandSubOpcode = 18 + i, - .ConstantBufferOffset = push_start, - .ConstantBufferSize = pipeline->urb.push_size[i]); + .ConstantBufferOffset = (push_size > 0) ? push_start : 0, + .ConstantBufferSize = push_size); push_start += pipeline->urb.push_size[i]; } -- cgit v1.2.3 From 1af5dacd76afe410374d442f4e0cd50820103fe8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 29 Jan 2016 15:31:30 -0800 Subject: anv/gen7: Enable SLM in L3 cache control register Port 1983003 to gen7. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 9681f22dc3d..26339bbf0d9 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -332,6 +332,65 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN7_L3SQCREG1 0xb010 +#define GEN7_L3CNTLREG2 0xb020 +#define GEN7_L3CNTLREG3 0xb024 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t l3c2_val = enable_slm ? + /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ + /*0x02040021*/0x010000a1 : + /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ + /*0x04080040*/0x02000030; + bool changed = cmd_buffer->state.current_l3_config != l3c2_val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + anv_finishme("write GEN7_L3SQCREG1"); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, + enable_slm ? 0x00040810 : 0x00040410); + cmd_buffer->state.current_l3_config = l3c2_val; + } +} + void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -340,6 +399,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), .PipelineSelection = GPGPU); -- cgit v1.2.3 From 635c0e92b777aefc9f82ffebfe982f57ac4503a8 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 30 Jan 2016 00:25:16 -0800 Subject: anv: Set CURBEAllocationSize in MEDIA_VFE_STATE Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_pipeline.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 41a5d0f889c..1605661f971 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -83,6 +83,27 @@ genX(compute_pipeline_create)( pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + uint32_t group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + pipeline->cs_thread_width_max = + DIV_ROUND_UP(group_size, cs_prog_data->simd_size); + uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); + + const uint32_t vfe_curbe_allocation = + push_constant_regs * pipeline->cs_thread_width_max; anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], @@ -100,19 +121,7 @@ genX(compute_pipeline_create)( .BypassGatewayControl = true, #endif .URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - + .CURBEAllocationSize = vfe_curbe_allocation); *pPipeline = anv_pipeline_to_handle(pipeline); -- cgit v1.2.3 From 9d8bae613779e0cc7382c9252ccd7f5e7cd5cada Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Mon, 29 Feb 2016 10:55:39 -0800 Subject: anv: Don't advertise pipelineStatisticsQuery We don't support that just yet. Reported-by: Jacek Konieczny --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 857c4b1d1f4..c68280fe8d7 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -373,7 +373,7 @@ void anv_GetPhysicalDeviceFeatures( .textureCompressionASTC_LDR = true, .textureCompressionBC = true, .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, + .pipelineStatisticsQuery = false, .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = true, -- cgit v1.2.3 From 51b618285d846295ef90fa49364d39eea4843801 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 22:47:59 -0800 Subject: anv/pipeline: Use dynamic checks for max push constants The GEN_GEN macros aren't available in anv_pipeline since it only gets compiled once for the whold driver. --- src/intel/vulkan/anv_pipeline.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 81d0d9c9bd9..df265842ccc 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -783,13 +783,14 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; /* Reserve space for push constants */ -#if GEN_GEN >= 8 - unsigned push_constant_kb = 32; -#elif GEN_IS_HASWELL - unsigned push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; -#else - unsigned push_constant_kb = 16; -#endif + unsigned push_constant_kb; + if (pipeline->device->info.gen >= 8) + push_constant_kb = 32; + else if (pipeline->device->info.is_haswell) + push_constant_kb = pipeline->device->info.gt == 3 ? 32 : 16; + else + push_constant_kb = 16; + unsigned push_constant_bytes = push_constant_kb * 1024; unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; -- cgit v1.2.3 From 6986ae35adbd83ff4f3c84946e998db488416b72 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:13:07 -0800 Subject: anv/pipeline: Avoid a division by zero --- src/intel/vulkan/anv_pipeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df265842ccc..cbd3a21abd7 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,7 +894,7 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); - const unsigned size_per_stage = push_constant_kb / stages; + const unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; unsigned used_kb = 0; for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { -- cgit v1.2.3 From 9715724015b49278fa3d110221ab39e1ed00c8c2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:13:56 -0800 Subject: anv/pipeline: Follow push constant alignment restrictions on BDW+ and HSW gt3 --- src/intel/vulkan/anv_pipeline.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index cbd3a21abd7..3dab205e5cc 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -894,9 +894,16 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) const unsigned stages = _mesa_bitcount(pipeline->active_stages & VK_SHADER_STAGE_ALL_GRAPHICS); - const unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; + unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; unsigned used_kb = 0; + /* Broadwell+ and Haswell gt3 require that the push constant sizes be in + * units of 2KB. Incidentally, these are the same platforms that have + * 32KB worth of push constant space. + */ + if (push_constant_kb == 32) + size_per_stage &= ~1u; + for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { pipeline->urb.push_size[i] = (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; -- cgit v1.2.3 From d29fd1c7cba7775298fb5a5e23d2c5026b3997af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:27:10 -0800 Subject: anv/cmd_buffer: Re-emit push constants packets for all stages --- src/intel/vulkan/gen7_cmd_buffer.c | 24 ++++++++++++------------ src/intel/vulkan/gen8_cmd_buffer.c | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 26339bbf0d9..b0456ae4c67 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + } } - cmd_buffer->state.push_constants_dirty &= ~flushed; + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; return flushed; } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 3221f5e2dc4..2e979d92760 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + } } - cmd_buffer->state.push_constants_dirty &= ~flushed; + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; return flushed; } -- cgit v1.2.3 From 097564bb8e30e3c13674a2aa113c373657628eb1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 14:27:34 -0800 Subject: anv/cmd_buffer: Dirty push constants when changing pipelines. --- src/intel/vulkan/gen7_cmd_buffer.c | 11 +++++++++++ src/intel/vulkan/gen8_cmd_buffer.c | 11 +++++++++++ 2 files changed, 22 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index b0456ae4c67..d2c4297cbca 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -470,6 +470,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) gen7_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 2e979d92760..9dc2abd1f29 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -286,6 +286,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } /* We emit the binding tables and sampler tables first, then emit push -- cgit v1.2.3 From 38f4c11c2f5c00a1d2addddcd0508ad89a7cead4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Feb 2016 17:27:11 -0800 Subject: anv/pipeline: Pull 3DSTATE_SBE into a shared helper --- src/intel/vulkan/gen7_pipeline.c | 14 +---- src/intel/vulkan/gen8_pipeline.c | 99 +----------------------------- src/intel/vulkan/genX_pipeline_util.h | 109 ++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 111 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7151e36f17d..c356fed7d68 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -245,10 +245,6 @@ genX(graphics_pipeline_create)( .SampleMask = 0xff); const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* The last geometry producing stage will set urb_offset and urb_length, - * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ - uint32_t urb_offset = 1; - uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; #if 0 /* From gen7_vs_state.c */ @@ -291,9 +287,6 @@ genX(graphics_pipeline_create)( if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); } else { - urb_offset = 1; - urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .KernelStartPointer = pipeline->gs_kernel, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], @@ -346,12 +339,7 @@ genX(graphics_pipeline_create)( if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) anv_finishme("primitive_id needs sbe swizzling setup"); - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, - .VertexURBEntryReadLength = urb_length, - .VertexURBEntryReadOffset = urb_offset, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + emit_3dstate_sbe(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index c9545c898f3..494a64949b6 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -443,104 +443,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), .PixelShaderValid = false); } else { - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; - - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by - * the vertex shader or it's gl_PrimitiveID. In the first case the - * value is undefined, in the second it needs to be - * gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; - } - } - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = - DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - wm_prog_data->num_varying_inputs, - -#if GEN_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, -#endif - ); - - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + emit_3dstate_sbe(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .KernelStartPointer0 = pipeline->ps_ksp0, diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index d940aba67b5..66250e5d4d6 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -219,6 +219,115 @@ emit_urb_setup(struct anv_pipeline *pipeline) } } +static void +emit_3dstate_sbe(struct anv_pipeline *pipeline) +{ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &pipeline->vs_prog_data.base.vue_map; + else + fs_input_map = &pipeline->gs_prog_data.base.vue_map; + + struct GENX(3DSTATE_SBE) sbe = { + GENX(3DSTATE_SBE_header), + .AttributeSwizzleEnable = true, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + +#if GEN_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + }; + +#if GEN_GEN >= 8 + /* On Broadwell, they broke 3DSTATE_SBE into two packets */ + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; +#else +# define swiz sbe +#endif + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = pipeline->wm_prog_data.urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + sbe.VertexURBEntryReadOffset = 1; /* Skip the VUE header and position slots */ + sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_length)); + GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe); + +#if GEN_GEN >= 8 + dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); +#endif +} + static inline uint32_t scratch_space(const struct brw_stage_prog_data *prog_data) { -- cgit v1.2.3 From 22d8666d74f6fa6de53366f76a56277976eced21 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Tue, 1 Mar 2016 09:17:16 -0800 Subject: anv: Add in image->offset when setting up depth buffer Fix from Neil Roberts. https://bugs.freedesktop.org/show_bug.cgi?id=94348 --- src/intel/vulkan/genX_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9be87a3ff05..c00c6d0decc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -572,7 +572,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) .SurfacePitch = image->depth_surface.isl.row_pitch - 1, .SurfaceBaseAddress = { .bo = image->bo, - .offset = image->depth_surface.offset, + .offset = image->offset + image->depth_surface.offset, }, .Height = fb->height - 1, .Width = fb->width - 1, -- cgit v1.2.3 From bb08d86efe32e5d59e6dde1a062539e626727d0a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 10:56:46 -0800 Subject: anv/cmd_buffer: Clean up stencil state setup on gen8 --- src/intel/vulkan/gen8_cmd_buffer.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 9dc2abd1f29..8a0fe60db33 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -355,6 +355,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) #if GEN_GEN == 8 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, @@ -364,10 +365,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front, + .BackFaceStencilReferenceValue = d->stencil_reference.back, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); @@ -384,23 +383,19 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), /* Is this what we need to do? */ - .StencilBufferWriteEnable = - cmd_buffer->state.dynamic.stencil_write_mask.front != 0, - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, }; GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw, &wm_depth_stencil); -- cgit v1.2.3 From 4cfdd1650083f3e425112ff697538e9818bc8946 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 11:02:12 -0800 Subject: anv/cmd_buffer: Clean up stencil state setup on gen7 --- src/intel/vulkan/gen7_cmd_buffer.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index d2c4297cbca..09025ef2323 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -557,6 +557,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, @@ -566,10 +567,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front, + .BackFaceStencilReferenceValue = d->stencil_reference.back, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) @@ -585,6 +584,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); @@ -592,15 +592,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, }; GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); -- cgit v1.2.3 From 6e20c1e058d7449c800506d05cd1c6431fa77a4b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 10:59:21 -0800 Subject: anv/cmd_buffer: Look at both sides for stencil enable Now it's all consistent with gen9 --- src/intel/vulkan/gen7_cmd_buffer.c | 6 ++---- src/intel/vulkan/gen8_cmd_buffer.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 09025ef2323..3ed93137f6a 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -586,11 +586,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8a0fe60db33..884152da207 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -388,8 +388,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), - /* Is this what we need to do? */ - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, -- cgit v1.2.3 From 8b091deb5e229dd67c7b9c72d511d3eaa7c9b7d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 11:38:11 -0800 Subject: anv: Unify gen7 and gen8 state Now that we've pulled surface state setup into ISL, there's not much to do here. --- src/intel/vulkan/Makefile.am | 8 +- src/intel/vulkan/gen7_state.c | 125 ------------------ src/intel/vulkan/gen8_state.c | 203 ---------------------------- src/intel/vulkan/genX_state.c | 263 +++++++++++++++++++++++++++++++++++++ src/intel/vulkan/genX_state_util.h | 65 --------- 5 files changed, 267 insertions(+), 397 deletions(-) delete mode 100644 src/intel/vulkan/gen7_state.c delete mode 100644 src/intel/vulkan/gen8_state.c create mode 100644 src/intel/vulkan/genX_state.c delete mode 100644 src/intel/vulkan/genX_state_util.h (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 6be4f9fb427..7d078cff91c 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -107,7 +107,7 @@ libanv_gen7_la_SOURCES = \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ - gen7_state.c + genX_state.c libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70 libanv_gen75_la_SOURCES = \ @@ -115,7 +115,7 @@ libanv_gen75_la_SOURCES = \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ - gen7_state.c + genX_state.c libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75 libanv_gen8_la_SOURCES = \ @@ -123,7 +123,7 @@ libanv_gen8_la_SOURCES = \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ - gen8_state.c + genX_state.c libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80 libanv_gen9_la_SOURCES = \ @@ -131,7 +131,7 @@ libanv_gen9_la_SOURCES = \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ - gen8_state.c + genX_state.c libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90 if HAVE_EGL_PLATFORM_WAYLAND diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c deleted file mode 100644 index 1360697f0de..00000000000 --- a/src/intel/vulkan/gen7_state.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen_macros.h" -#include "genxml/genX_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); - - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampEnable = CLAMP_ENABLE_OGL, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, - pCreateInfo->anisotropyEnable), - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .BorderColorPointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c deleted file mode 100644 index 784269b98cb..00000000000 --- a/src/intel/vulkan/gen8_state.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen_macros.h" -#include "genxml/genX_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); - - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if GEN_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), - .ChromaKeyKillEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - - /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and - * VkPhysicalDeviceFeatures::standardSampleLocations. - */ - anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), - ._1xSample0XOffset = 0.5, - ._1xSample0YOffset = 0.5, - ._2xSample0XOffset = 0.25, - ._2xSample0YOffset = 0.25, - ._2xSample1XOffset = 0.75, - ._2xSample1YOffset = 0.75, - ._4xSample0XOffset = 0.375, - ._4xSample0YOffset = 0.125, - ._4xSample1XOffset = 0.875, - ._4xSample1YOffset = 0.375, - ._4xSample2XOffset = 0.125, - ._4xSample2YOffset = 0.625, - ._4xSample3XOffset = 0.625, - ._4xSample3YOffset = 0.875, - ._8xSample0XOffset = 0.5625, - ._8xSample0YOffset = 0.3125, - ._8xSample1XOffset = 0.4375, - ._8xSample1YOffset = 0.6875, - ._8xSample2XOffset = 0.8125, - ._8xSample2YOffset = 0.5625, - ._8xSample3XOffset = 0.3125, - ._8xSample3YOffset = 0.1875, - ._8xSample4XOffset = 0.1875, - ._8xSample4YOffset = 0.8125, - ._8xSample5XOffset = 0.0625, - ._8xSample5YOffset = 0.4375, - ._8xSample6XOffset = 0.6875, - ._8xSample6YOffset = 0.9375, - ._8xSample7XOffset = 0.9375, - ._8xSample7YOffset = 0.0625, -#if GEN_GEN >= 9 - ._16xSample0XOffset = 0.5625, - ._16xSample0YOffset = 0.5625, - ._16xSample1XOffset = 0.4375, - ._16xSample1YOffset = 0.3125, - ._16xSample2XOffset = 0.3125, - ._16xSample2YOffset = 0.6250, - ._16xSample3XOffset = 0.7500, - ._16xSample3YOffset = 0.4375, - ._16xSample4XOffset = 0.1875, - ._16xSample4YOffset = 0.3750, - ._16xSample5XOffset = 0.6250, - ._16xSample5YOffset = 0.8125, - ._16xSample6XOffset = 0.8125, - ._16xSample6YOffset = 0.6875, - ._16xSample7XOffset = 0.6875, - ._16xSample7YOffset = 0.1875, - ._16xSample8XOffset = 0.3750, - ._16xSample8YOffset = 0.8750, - ._16xSample9XOffset = 0.5000, - ._16xSample9YOffset = 0.0625, - ._16xSample10XOffset = 0.2500, - ._16xSample10YOffset = 0.1250, - ._16xSample11XOffset = 0.1250, - ._16xSample11YOffset = 0.7500, - ._16xSample12XOffset = 0.0000, - ._16xSample12YOffset = 0.5000, - ._16xSample13XOffset = 0.9375, - ._16xSample13YOffset = 0.2500, - ._16xSample14XOffset = 0.8750, - ._16xSample14YOffset = 0.9375, - ._16xSample15XOffset = 0.0625, - ._16xSample15YOffset = 0.0000, -#endif - ); - - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - uint32_t border_color_offset = device->border_colors.offset + - pCreateInfo->borderColor * 64; - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = CLAMP_MODE_OGL, -#if GEN_GEN == 8 - .BaseMipLevel = 0.0, -#endif - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), - .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), - .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .IndirectStatePointer = border_color_offset >> 6, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c new file mode 100644 index 00000000000..866f1bfd337 --- /dev/null +++ b/src/intel/vulkan/genX_state.c @@ -0,0 +1,263 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen_macros.h" +#include "genxml/genX_pack.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, + &GENX(MOCS)); + + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if GEN_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS)); + anv_batch_emit(&batch, GENX(3DSTATE_TE)); + anv_batch_emit(&batch, GENX(3DSTATE_DS)); + + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + +#if GEN_GEN >= 8 + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if GEN_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); +#endif + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + +#if GEN_GEN >= 8 + .LODPreClampMode = CLAMP_MODE_OGL, +#else + .LODPreClampEnable = CLAMP_ENABLE_OGL, +#endif + +#if GEN_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + +#if GEN_GEN >= 8 + .IndirectStatePointer = border_color_offset >> 6, +#else + .BorderColorPointer = border_color_offset >> 5, +#endif + +#if GEN_GEN >= 8 + .LODClampMagnificationMode = MIPNONE, +#endif + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h deleted file mode 100644 index aabcea9c183..00000000000 --- a/src/intel/vulkan/genX_state_util.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static inline uint32_t -vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) -{ - switch (filter) { - default: - assert(!"Invalid filter"); - case VK_FILTER_NEAREST: - return MAPFILTER_NEAREST; - case VK_FILTER_LINEAR: - return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; - } -} - -static inline uint32_t -vk_to_gen_max_anisotropy(float ratio) -{ - return (anv_clamp_f(ratio, 2, 16) - 2) / 2; -} - -static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; -- cgit v1.2.3 From eecd1f80011701de6174f22106014910c9c79484 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 13:51:50 -0800 Subject: gen7/pipeline: Add competent blending This is mostly a copy-and-paste from gen8. Blending still isn't 100% but it fixes about 1100 CTS blend tests on HSW. --- src/intel/vulkan/gen7_pipeline.c | 88 ++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 43 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index c356fed7d68..7d283f18f40 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -123,50 +123,52 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline, .WriteDisableGreen = true, .WriteDisableBlue = true); } else { - /* FIXME-GEN7: All render targets share blend state settings on gen7, we - * can't implement this. - */ const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(BLEND_STATE), 64, - - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - -# if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; -# endif - - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - -# if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -# endif - ); + struct GENX(BLEND_STATE) blend = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend.SourceBlendFactor = BLENDFACTOR_ONE; + blend.DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + + pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, + GENX(BLEND_STATE_length) * 4, + 64); + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend); + if (pipeline->device->info.has_llc) + anv_state_clflush(pipeline->blend_state); } anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), -- cgit v1.2.3 From e941fd84707d4ed04a683f8862d184956a60f9ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 14:43:05 -0800 Subject: genxml: Make the border color pointer consistent across gens --- src/intel/genxml/gen8.xml | 2 +- src/intel/genxml/gen9.xml | 2 +- src/intel/vulkan/genX_state.c | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 9a52fbaa330..96eda703453 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -412,7 +412,7 @@ - + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 2b73f5f2a39..79d3006d24b 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -437,7 +437,7 @@ - + diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 866f1bfd337..63ea26937e5 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -231,11 +231,7 @@ VkResult genX(CreateSampler)( .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = OVERRIDE, -#if GEN_GEN >= 8 - .IndirectStatePointer = border_color_offset >> 6, -#else - .BorderColorPointer = border_color_offset >> 5, -#endif + .BorderColorPointer = border_color_offset, #if GEN_GEN >= 8 .LODClampMagnificationMode = MIPNONE, -- cgit v1.2.3 From 5b70aa11ee136baf5aa1b2ba21f10fc42af53c88 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 17:19:43 -0800 Subject: anv/meta_blit: Use unorm formats for 8 and 16-bit RGB and RGBA values While Broadwell is very good about UINT formats, HSW is more restrictive. Neither R8G8B8_UINT nor R16G16B16_UINT really exist on HSW. It should be safe to just use the unorm formats. --- src/intel/vulkan/anv_meta_blit.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2ec428b5f4a..96a3b7669ac 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -375,17 +375,26 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, static VkFormat vk_format_for_size(int bs) { - /* Note: We intentionally use the 4-channel formats whenever we can. - * This is so that, when we do a RGB <-> RGBX copy, the two formats will - * line up even though one of them is 3/4 the size of the other. + /* The choice of UNORM and UINT formats is very intentional here. Most of + * the time, we want to use a UINT format to avoid any rounding error in + * the blit. For stencil blits, R8_UINT is required by the hardware. + * (It's the only format allowed in conjunction with W-tiling.) Also we + * intentionally use the 4-channel formats whenever we can. This is so + * that, when we do a RGB <-> RGBX copy, the two formats will line up even + * though one of them is 3/4 the size of the other. The choice of UNORM + * vs. UINT is also very intentional because Haswell doesn't handle 8 or + * 16-bit RGB UINT formats at all so we have to use UNORM there. + * Fortunately, the only time we should ever use two different formats in + * the table below is for RGB -> RGBA blits and so we will never have any + * UNORM/UINT mismatch. */ switch (bs) { case 1: return VK_FORMAT_R8_UINT; case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 3: return VK_FORMAT_R8G8B8_UNORM; + case 4: return VK_FORMAT_R8G8B8A8_UNORM; + case 6: return VK_FORMAT_R16G16B16_UNORM; + case 8: return VK_FORMAT_R16G16B16A16_UNORM; case 12: return VK_FORMAT_R32G32B32_UINT; case 16: return VK_FORMAT_R32G32B32A32_UINT; default: -- cgit v1.2.3 From 8f5a64e44f4daf2c44c35f209b2452b4b6c6e4e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Mar 2016 10:46:13 -0800 Subject: gen8/cmd_buffer: Properly return flushed push constant stages This is required on SKL so that we can properly re-emit binding table pointers commands. --- src/intel/vulkan/gen8_cmd_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 884152da207..9d4926f86cd 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -63,9 +63,11 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), }); } + + flushed |= mesa_to_vk_shader_stage(stage); } - cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + cmd_buffer->state.push_constants_dirty &= ~flushed; return flushed; } -- cgit v1.2.3 From b0867ca4b26aa6b3e30af8d6050b94d283636cbc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Mar 2016 11:31:32 -0800 Subject: anv: Fix make check --- src/intel/vulkan/Makefile.am | 10 ++++++---- src/intel/vulkan/tests/Makefile.am | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 7d078cff91c..272db40d10b 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -65,8 +65,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src \ -I$(top_builddir)/src/compiler \ -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/intel \ - -I$(top_builddir)/src/vulkan + -I$(top_builddir)/src/intel libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init @@ -198,7 +197,10 @@ libvulkan_test_la_SOURCES = \ $(VULKAN_SOURCES) \ anv_gem_stubs.c -libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) +libvulkan_test_la_CFLAGS = \ + -I$(top_srcdir)/src/intel/vulkan \ + $(libvulkan_intel_la_CFLAGS) + +libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD) include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/vulkan/tests/Makefile.am b/src/intel/vulkan/tests/Makefile.am index 883013d86c6..ddff73c1707 100644 --- a/src/intel/vulkan/tests/Makefile.am +++ b/src/intel/vulkan/tests/Makefile.am @@ -30,11 +30,12 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/isl/ \ - -I$(top_srcdir)/src/vulkan + -I$(top_srcdir)/src/intel \ + -I$(top_srcdir)/src/intel/vulkan \ + -I$(top_builddir)/src/intel/vulkan LDADD = \ - $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(top_builddir)/src/intel/vulkan/libvulkan-test.la \ $(PTHREAD_LIBS) -lm -lstdc++ check_PROGRAMS = \ -- cgit v1.2.3 From da4745104cc02fc0052a2e05e37c69a4dce76eef Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 2 Mar 2016 01:09:16 -0800 Subject: anv: Save batch to local variable for indirect compute Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c00c6d0decc..dbb72b44ee2 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -507,6 +507,7 @@ void genX(CmdDispatchIndirect)( struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; + struct anv_batch *batch = &cmd_buffer->batch; if (prog_data->uses_num_work_groups) { cmd_buffer->state.num_workgroups_offset = bo_offset; @@ -515,11 +516,11 @@ void genX(CmdDispatchIndirect)( genX(cmd_buffer_flush_compute_state)(cmd_buffer); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + emit_lrm(batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + anv_batch_emit(batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, @@ -528,7 +529,7 @@ void genX(CmdDispatchIndirect)( .RightExecutionMask = pipeline->cs_right_mask, .BottomExecutionMask = 0xffffffff); - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); + anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH)); } void -- cgit v1.2.3 From 98cdce1ce4737cf09c5d9613a85bb118f0f1757b Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 2 Mar 2016 01:11:29 -0800 Subject: anv/gen7: Use predicated rendering for indirect compute For OpenGL, see commit 9a939ebb47a0d37a6b29e3dbb1b20bdc9538a721. Fixes: * dEQP-VK.compute.indirect_dispatch.upload_buffer.empty_command * dEQP-VK.compute.indirect_dispatch.gen_in_compute.empty_command Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index dbb72b44ee2..a888c360673 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -496,6 +496,9 @@ void genX(CmdDispatch)( #define GPGPU_DISPATCHDIMY 0x2504 #define GPGPU_DISPATCHDIMZ 0x2508 +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 + void genX(CmdDispatchIndirect)( VkCommandBuffer commandBuffer, VkBuffer _buffer, @@ -520,8 +523,50 @@ void genX(CmdDispatchIndirect)( emit_lrm(batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); emit_lrm(batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); +#if GEN_GEN <= 7 + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 0, 0); + emit_lri(batch, MI_PREDICATE_SRC1 + 4, 0); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 0); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_SET, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 4); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 8); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOAD, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_SRCS_EQUAL); + + /* predicate = !predicate; */ +#define COMPARE_FALSE 1 + anv_batch_emit(batch, GENX(MI_PREDICATE), + .LoadOperation = LOAD_LOADINV, + .CombineOperation = COMBINE_OR, + .CompareOperation = COMPARE_FALSE); +#endif + anv_batch_emit(batch, GENX(GPGPU_WALKER), .IndirectParameterEnable = true, + .PredicateEnable = GEN_GEN <= 7, .SIMDSize = prog_data->simd_size / 16, .ThreadDepthCounterMaximum = 0, .ThreadHeightCounterMaximum = 0, -- cgit v1.2.3 From 206414f92edb4a2149b504f9c296f687a9572ffe Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 3 Mar 2016 08:17:36 -0800 Subject: anv/util: Fix vector resizing It wasn't properly handling the fact that wrap-around in the source may not translate to wrap-around in the destination. This really needs unit tests. --- src/intel/vulkan/anv_util.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 22fd01c9495..62f47051ec7 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -144,7 +144,7 @@ anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) void * anv_vector_add(struct anv_vector *vector) { - uint32_t offset, size, split, tail; + uint32_t offset, size, split, src_tail, dst_tail; void *data; if (vector->head - vector->tail == vector->size) { @@ -152,18 +152,25 @@ anv_vector_add(struct anv_vector *vector) data = malloc(size); if (data == NULL) return NULL; - split = align_u32(vector->tail, vector->size); - tail = vector->tail & (vector->size - 1); - if (vector->head - split < vector->size) { - memcpy(data + tail, - vector->data + tail, - split - vector->tail); - memcpy(data + vector->size, - vector->data, vector->head - split); + src_tail = vector->tail & (vector->size - 1); + dst_tail = vector->tail & (size - 1); + if (src_tail == 0) { + /* Since we know that the vector is full, this means that it's + * linear from start to end so we can do one copy. + */ + memcpy(data + dst_tail, vector->data, vector->size); } else { - memcpy(data + tail, - vector->data + tail, - vector->head - vector->tail); + /* In this case, the vector is split into two pieces and we have + * to do two copies. We have to be careful to make sure each + * piece goes to the right locations. Thanks to the change in + * size, it may or may not still wrap around. + */ + split = align_u32(vector->tail, vector->size); + assert(vector->tail <= split && split < vector->head); + memcpy(data + dst_tail, vector->data + src_tail, + split - vector->tail); + memcpy(data + (split & (size - 1)), vector->data, + vector->head - split); } free(vector->data); vector->data = data; -- cgit v1.2.3 From 1d9d90d9a6323c37e80b7870946597b470d8dec0 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 09:44:48 -0800 Subject: anv/image: Create a linear image when requested If a linear image is requested, the only possible result should be a linearly-tiled surface. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 46cf2413468..dc1ea9c80cc 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -120,7 +120,7 @@ make_surface(const struct anv_device *dev, isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) - tiling_flags &= ISL_TILING_LINEAR_BIT; + tiling_flags = ISL_TILING_LINEAR_BIT; struct anv_surface *anv_surf = get_surface(image, aspect); -- cgit v1.2.3 From d50ff250ec25e4903ef9e82c47981aaed962e464 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 14:27:17 -0800 Subject: anv/meta: Add missing command to exit meta in anv_CmdUpdateBuffer() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 96a3b7669ac..216a0bfa39d 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -625,6 +625,8 @@ void anv_CmdUpdateBuffer( dstOffset += copy_size; pData = (void *)pData + copy_size; } + + meta_finish_blit(cmd_buffer, &saved_state); } static VkFormat -- cgit v1.2.3 From cfe70367503ffb49a850a17e03f4c7e4138af4f1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 14:25:00 -0800 Subject: anv/meta: Replace copy_format w/ block size in do_buffer_copy() This is a preparatory commit that will simplify the future usage of this function. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 216a0bfa39d..9d41add6079 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -406,9 +406,10 @@ static void do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *src, uint64_t src_offset, struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) + int width, int height, int bs) { VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat copy_format = vk_format_for_size(bs); VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -534,8 +535,6 @@ void anv_CmdCopyBuffer( bs = MIN2(bs, 1 << fs); assert(pRegions[r].size % bs == 0); - VkFormat copy_format = vk_format_for_size(bs); - /* This is maximum possible width/height our HW can handle */ uint64_t max_surface_dim = 1 << 14; @@ -544,7 +543,7 @@ void anv_CmdCopyBuffer( while (copy_size >= max_copy_size) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); + max_surface_dim, max_surface_dim, bs); copy_size -= max_copy_size; src_offset += max_copy_size; dest_offset += max_copy_size; @@ -556,7 +555,7 @@ void anv_CmdCopyBuffer( uint64_t rect_copy_size = height * max_surface_dim * bs; do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); + max_surface_dim, height, bs); copy_size -= rect_copy_size; src_offset += rect_copy_size; dest_offset += rect_copy_size; @@ -565,7 +564,7 @@ void anv_CmdCopyBuffer( if (copy_size != 0) { do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, dest_buffer->bo, dest_offset, - copy_size / bs, 1, copy_format); + copy_size / bs, 1, bs); } } @@ -601,17 +600,13 @@ void anv_CmdUpdateBuffer( memcpy(tmp_data.map, pData, copy_size); - VkFormat format; int bs; if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; bs = 16; } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - format = VK_FORMAT_R32G32_UINT; bs = 8; } else { assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; bs = 4; } @@ -619,7 +614,7 @@ void anv_CmdUpdateBuffer( &cmd_buffer->device->dynamic_state_block_pool.bo, tmp_data.offset, dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, format); + copy_size / bs, 1, bs); dataSize -= copy_size; dstOffset += copy_size; -- cgit v1.2.3 From d1e48b994565c1d6c1cfa546ba7ab09145c12601 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 13:29:04 -0800 Subject: anv/meta: Remove redundancies in do_buffer_copy() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 46 ++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 28 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 9d41add6079..478b1997172 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -446,39 +446,29 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(dest_image)->bo = dest; anv_image_from_handle(dest_image)->offset = dest_offset; + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + struct anv_image_view src_iview; + iview_info.image = src_image; anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; + iview_info.image = dest_image; anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), -- cgit v1.2.3 From 654f79a04512502df96d9e6ce99ac0f95516d193 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Dec 2015 15:37:54 -0800 Subject: anv/meta: Add the beginnings of a blitter API This API is designed to be an abstraction that sits between the VkCmdCopy commands and the hardware. The idea is that it is simple enough that it *should* be implementable using the blitter but with enough extra data that we can implement it with the 3-D pipeline efficiently. One design objective is to allow the user to supply enough information that we can handle most blit operations with a single draw call even if they require copying multiple rectangles. --- src/intel/vulkan/anv_meta.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index d33e9e6d8ba..f5dac12a04a 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -70,6 +70,54 @@ anv_meta_get_iview_layer(const struct anv_image *dest_image, const VkImageSubresourceLayers *dest_subresource, const VkOffset3D *dest_offset); +struct anv_meta_blit2d_surf { + struct anv_bo *bo; + enum isl_tiling tiling; + + /** Base offset to the start of the image */ + uint64_t base_offset; + + uint32_t offset_x; + uint32_t offset_y; + + /** The size of a unit in bytes. (Usually texel size) */ + uint8_t units; + + /** Stride between rows in bytes. */ + uint32_t stride; + + /** Possible vertical stride in rows. + * + * This is a hint to the blit engine that tells it that it can, if it + * wants, split the surface into v_stride tall chunks. The user makes + * the guarantee that no rectangles it passes in will every cross a + * v_stride boundary. A v_stride value of 0 indicates that the user + * cannot make such a guarantee. + */ + uint32_t v_stride; +}; + +struct anv_meta_blit2d_rect { + uint32_t src_x, src_y; + uint32_t dst_x, dst_y; + uint32_t width, height; +}; + +static void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save); + +static void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects); + +static void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 032bf172b48211af8fc892747dc4600fb6595f99 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 25 Feb 2016 15:21:12 -0800 Subject: anv/meta: Modify blitter API fields Some fields are unnecessary. The variables "pitch" and "bs" are used for consistency with ISL. v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index f5dac12a04a..952176453e3 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -77,24 +77,11 @@ struct anv_meta_blit2d_surf { /** Base offset to the start of the image */ uint64_t base_offset; - uint32_t offset_x; - uint32_t offset_y; - - /** The size of a unit in bytes. (Usually texel size) */ - uint8_t units; - - /** Stride between rows in bytes. */ - uint32_t stride; - - /** Possible vertical stride in rows. - * - * This is a hint to the blit engine that tells it that it can, if it - * wants, split the surface into v_stride tall chunks. The user makes - * the guarantee that no rectangles it passes in will every cross a - * v_stride boundary. A v_stride value of 0 indicates that the user - * cannot make such a guarantee. - */ - uint32_t v_stride; + /** The size of an element in bytes. */ + uint8_t bs; + + /** Pitch between rows in bytes. */ + uint32_t pitch; }; struct anv_meta_blit2d_rect { -- cgit v1.2.3 From 2e9b08b9b89c0cf10cc7ca73fd39380766943283 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 12:26:21 -0800 Subject: anv/meta: Implement the blitter API functions Most of the code in anv_meta_blit2d() is borrowed from do_buffer_copy(). Create an image and image view for each rectangle. Note: For tiled RGB images, ISL will align the image's row_pitch up to the nearest tile width. v2 (Jason): Keep pitch in units of bytes Make src_format and dst_format variables s/dest/dst/ in every usage v3: Fix dst_image width Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 6 +- src/intel/vulkan/anv_meta_blit.c | 142 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 952176453e3..587c044fa5f 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -90,18 +90,18 @@ struct anv_meta_blit2d_rect { uint32_t width, height; }; -static void +void anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); -static void +void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, struct anv_meta_blit2d_surf *dst, unsigned num_rects, struct anv_meta_blit2d_rect *rects); -static void +void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 478b1997172..bef66751a7f 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,6 +119,14 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } +void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + meta_prepare_blit(cmd_buffer, save); +} + + /* Returns the user-provided VkBufferImageCopy::imageOffset in units of * elements rather than texels. One element equals one texel or one block * if Image is uncompressed or compressed, respectively. @@ -372,6 +380,13 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_meta_restore(saved_state, cmd_buffer); } +void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + meta_finish_blit(cmd_buffer, save); +} + static VkFormat vk_format_for_size(int bs) { @@ -402,6 +417,133 @@ vk_format_for_size(int bs) } } +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat src_format = vk_format_for_size(src->bs); + VkFormat dst_format = vk_format_for_size(dst->bs); + + for (unsigned r = 0; r < num_rects; ++r) { + + /* Create VkImages */ + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = 0, /* TEMPLATE */ + .extent = { + .width = 0, /* TEMPLATE */ + /* Pad to highest tile height to compensate for a vertical intratile offset */ + .height = MIN(rects[r].height + 64, 1 << 14), + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = 0, /* TEMPLATE */ + .usage = 0, /* TEMPLATE */ + }; + struct anv_image_create_info anv_image_info = { + .vk_info = &image_info, + .isl_tiling_flags = 0, /* TEMPLATE */ + }; + + anv_image_info.isl_tiling_flags = 1 << src->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.format = src_format, + image_info.extent.width = src->pitch / src->bs; + VkImage src_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &src_image); + + anv_image_info.isl_tiling_flags = 1 << dst->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.format = dst_format, + image_info.extent.width = dst->pitch / dst->bs; + VkImage dst_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &dst_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src->bo; + anv_image_from_handle(src_image)->offset = src->base_offset; + anv_image_from_handle(dst_image)->bo = dst->bo; + anv_image_from_handle(dst_image)->offset = dst->base_offset; + + /* Create VkImageViews */ + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = 0, /* TEMPLATE */ + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + uint32_t img_o = 0; + + iview_info.image = src_image; + iview_info.format = src_format; + VkOffset3D src_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(src_image)-> + color_surface.isl, + rects[r].src_x, + rects[r].src_y, + &img_o, + (uint32_t*)&src_offset_el.x, + (uint32_t*)&src_offset_el.y); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_SAMPLED_BIT); + + iview_info.image = dst_image; + iview_info.format = dst_format; + VkOffset3D dst_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(dst_image)-> + color_surface.isl, + rects[r].dst_x, + rects[r].dst_y, + &img_o, + (uint32_t*)&dst_offset_el.x, + (uint32_t*)&dst_offset_el.y); + struct anv_image_view dst_iview; + anv_image_view_init(&dst_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + /* Perform blit */ + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + src_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + anv_image_from_handle(dst_image), + &dst_iview, + dst_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + } +} + static void do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *src, uint64_t src_offset, -- cgit v1.2.3 From 61ad78d0d1ffafc89cdc9da9d5ae710be36e3089 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 1 Mar 2016 23:15:35 -0800 Subject: anv/meta: Add function to create anv_meta_blit2d_surf from anv_image v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index bef66751a7f..044998d0f56 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -417,6 +417,19 @@ vk_format_for_size(int bs) } } +static struct anv_meta_blit2d_surf +blit_surf_for_image(const struct anv_image* image, + const struct isl_surf *img_isl_surf) +{ + return (struct anv_meta_blit2d_surf) { + .bo = image->bo, + .tiling = img_isl_surf->tiling, + .base_offset = image->offset, + .bs = isl_format_get_layout(img_isl_surf->format)->bs, + .pitch = isl_surf_get_row_pitch(img_isl_surf), + }; +} + void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, -- cgit v1.2.3 From 91640c34c6b474903fa5634f86f87c774d16db88 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 14:28:25 -0800 Subject: anv/meta: Add function which copies between Buffers and Images v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 92 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 044998d0f56..8cda3d587fa 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -1040,6 +1040,98 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } +static void +meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_buffer* buffer, + struct anv_image* image, + uint32_t regionCount, + const VkBufferImageCopy* pRegions, + bool forward) +{ + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(image->samples == 1); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + + /* Start creating blit rect */ + const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); + const VkExtent3D bufferExtent = { + .width = pRegions[r].bufferRowLength, + .height = pRegions[r].bufferImageHeight, + }; + const VkExtent3D buf_extent_el = meta_region_extent_el(image->vk_format, &bufferExtent); + const VkExtent3D img_extent_el = meta_region_extent_el(image->vk_format, + &pRegions[r].imageExtent); + struct anv_meta_blit2d_rect rect = { + .width = MAX2(buf_extent_el.width, img_extent_el.width), + .height = MAX2(buf_extent_el.height, img_extent_el.height), + }; + + /* Create blit surfaces */ + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + const struct isl_surf *img_isl_surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + struct anv_meta_blit2d_surf img_bsurf = blit_surf_for_image(image, img_isl_surf); + struct anv_meta_blit2d_surf buf_bsurf = { + .bo = buffer->bo, + .tiling = ISL_TILING_LINEAR, + .base_offset = buffer->offset + pRegions[r].bufferOffset, + .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, + .pitch = rect.width * buf_bsurf.bs, + }; + + /* Set direction-dependent variables */ + struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; + struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; + uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; + uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(img_isl_surf, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + x_offset, + y_offset); + *x_offset += img_offset_el.x; + *y_offset += img_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, + src_bsurf, + dst_bsurf, + 1, + &rect); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; + + if (image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + static struct anv_image * make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, VkImageUsageFlags usage, -- cgit v1.2.3 From 9b6c95d46ee19224e8013a0fbc991f8b2135017d Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 17:11:55 -0800 Subject: anv/meta: Use blitter API for copies between Images and Buffers Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 214 ++------------------------------------- 1 file changed, 6 insertions(+), 208 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8cda3d587fa..8ca1871f9d8 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -1189,121 +1189,10 @@ void anv_CmdCopyBufferToImage( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(dest_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(dest_image, aspect); - - struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, - VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); - - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = anv_meta_get_view_type(dest_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].imageExtent); - - meta_emit_blit(cmd_buffer, - src_image, - &src_iview, - (VkOffset3D){0, 0, 0}, - img_extent_el, - dest_image, - &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - src_image->offset += src_image->extent.width * - src_image->extent.height * - src_image->format->isl_layout->bs; - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(src_image), - &cmd_buffer->pool->alloc); - } + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - meta_finish_blit(cmd_buffer, &saved_state); + meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, + regionCount, pRegions, true); } void anv_CmdCopyImageToBuffer( @@ -1316,101 +1205,10 @@ void anv_CmdCopyImageToBuffer( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_image, src_image, srcImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - - /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(src_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(src_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .layerCount = pRegions[r].imageSubresource.layerCount, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].imageOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - src_offset, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - dest_image->offset += dest_image->extent.width * - dest_image->extent.height * - src_image->format->isl_layout->bs; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); + meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, + regionCount, pRegions, false); } void -- cgit v1.2.3 From 96ff4d0679e9243d4df070cc0eaef41e1b9d742b Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 12:19:51 -0800 Subject: anv/meta: Use blitter API in anv_CmdCopyImage() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 113 +++++++++++++++------------------------ 1 file changed, 42 insertions(+), 71 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 8ca1871f9d8..2e529eed0f8 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -836,7 +836,7 @@ void anv_CmdCopyImage( */ assert(src_image->samples == dest_image->samples); - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == @@ -844,84 +844,55 @@ void anv_CmdCopyImage( VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - VkFormat src_format = choose_iview_format(src_image, aspect); - VkFormat dst_format = choose_iview_format(dest_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = pRegions[r].dstSubresource.layerCount, - }, - }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); + /* Create blit surfaces */ + struct isl_surf *src_isl_surf = + &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; + struct isl_surf *dst_isl_surf = + &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; + struct anv_meta_blit2d_surf b_src = blit_surf_for_image(src_image, src_isl_surf); + struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); + /* Start creating blit rect */ + const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); + const VkExtent3D img_extent_el = meta_region_extent_el(src_image->vk_format, + &pRegions[r].extent); + struct anv_meta_blit2d_rect rect = { + .width = img_extent_el.width, + .height = img_extent_el.height, + }; + /* Loop through each 3D or array slice */ unsigned num_slices_3d = pRegions[r].extent.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice_3d + slice_array; - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dst_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].extent); + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(dst_isl_surf, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &rect.dst_x, + &rect.dst_y); + isl_surf_get_image_offset_el(src_isl_surf, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer + slice_array, + pRegions[r].srcOffset.z + slice_3d, + &rect.src_x, + &rect.src_y); + rect.dst_x += dst_offset_el.x; + rect.dst_y += dst_offset_el.y; + rect.src_x += src_offset_el.x; + rect.src_y += src_offset_el.y; - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - src_offset, - img_extent_el, - dest_image, &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, + &b_src, + &b_dst, + 1, + &rect); if (dest_image->type == VK_IMAGE_TYPE_3D) slice_3d++; @@ -930,7 +901,7 @@ void anv_CmdCopyImage( } } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } void anv_CmdBlitImage( -- cgit v1.2.3 From 318b67d1578213b802443f06f767a1c9d6d00310 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 27 Feb 2016 17:11:55 -0800 Subject: anv/meta: Use blitter API in do_buffer_copy() v2: Keep pitch in units of bytes (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 94 +++++++++------------------------------- 1 file changed, 21 insertions(+), 73 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2e529eed0f8..bd1c9798823 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -563,81 +563,29 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dest, uint64_t dest_offset, int width, int height, int bs) { - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat copy_format = vk_format_for_size(bs); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = 0, - .flags = 0, + struct anv_meta_blit2d_surf b_src = { + .bo = src, + .tiling = ISL_TILING_LINEAR, + .base_offset = src_offset, + .bs = bs, + .pitch = width * bs, }; - - VkImage src_image; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &src_image); - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, + struct anv_meta_blit2d_surf b_dst = { + .bo = dest, + .tiling = ISL_TILING_LINEAR, + .base_offset = dest_offset, + .bs = bs, + .pitch = width * bs, }; - - struct anv_image_view src_iview; - iview_info.image = src_image; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); - - struct anv_image_view dest_iview; - iview_info.image = dest_image; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &iview_info, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - anv_image_from_handle(dest_image), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); + struct anv_meta_blit2d_rect rect = { + .width = width, + .height = height, + }; + anv_meta_blit2d(cmd_buffer, + &b_src, + &b_dst, + 1, + &rect); } void anv_CmdCopyBuffer( -- cgit v1.2.3 From d20f6abc85c1dc4f4a2aadd352b64502930a0541 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 2 Mar 2016 14:33:51 -0800 Subject: anv/meta: Use blitter API for state-handling in Buffer Update/Copy Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index bd1c9798823..3fc46318150 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -601,7 +601,7 @@ void anv_CmdCopyBuffer( struct anv_meta_saved_state saved_state; - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; @@ -661,7 +661,7 @@ void anv_CmdCopyBuffer( } } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } void anv_CmdUpdateBuffer( @@ -675,7 +675,7 @@ void anv_CmdUpdateBuffer( ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); struct anv_meta_saved_state saved_state; - meta_prepare_blit(cmd_buffer, &saved_state); + anv_meta_begin_blit2d(cmd_buffer, &saved_state); /* We can't quite grab a full block because the state stream needs a * little data at the top to build its linked list. @@ -714,7 +714,7 @@ void anv_CmdUpdateBuffer( pData = (void *)pData + copy_size; } - meta_finish_blit(cmd_buffer, &saved_state); + anv_meta_end_blit2d(cmd_buffer, &saved_state); } static VkFormat -- cgit v1.2.3 From 8dddc3fb1e55a7cc82c0afe2c880c1ef485d21c1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 29 Feb 2016 14:37:48 -0800 Subject: anv/meta: Delete unused functions Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 93 ---------------------------------------- 1 file changed, 93 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 3fc46318150..b8a42f99eec 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -717,52 +717,6 @@ void anv_CmdUpdateBuffer( anv_meta_end_blit2d(cmd_buffer, &saved_state); } -static VkFormat -choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - struct isl_surf *surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - - /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT - * formats for the source and destination image views. - * - * From the Vulkan spec (2015-12-30): - * - * vkCmdCopyImage performs image copies in a similar manner to a host - * memcpy. It does not perform general-purpose conversions such as - * scaling, resizing, blending, color-space conversion, or format - * conversions. Rather, it simply copies raw image data. vkCmdCopyImage - * can copy between images with different formats, provided the formats - * are compatible as defined below. - * - * [The spec later defines compatibility as having the same number of - * bytes per block]. - */ - return vk_format_for_size(isl_format_layouts[surf->format].bs); -} - -static VkFormat -choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - /* vkCmdCopy* commands behave like memcpy. Therefore we choose - * compatable UINT formats for the source and destination image views. - * - * For the buffer, we go back to the original image format and get a - * the format as if it were linear. This way, for RGB formats, we get - * an RGB format here even if the tiled image is RGBA. XXX: This doesn't - * work if the buffer is the destination. - */ - enum isl_format linear_format = anv_get_isl_format(format, aspect, - VK_IMAGE_TILING_LINEAR, - NULL); - - return vk_format_for_size(isl_format_layouts[linear_format].bs); -} - void anv_CmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -1051,53 +1005,6 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, anv_meta_end_blit2d(cmd_buffer, &saved_state); } -static struct anv_image * -make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, - VkImageUsageFlags usage, - VkImageType image_type, - const VkAllocationCallbacks *alloc, - const VkBufferImageCopy *copy) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - - VkExtent3D extent = copy->imageExtent; - if (copy->bufferRowLength) - extent.width = copy->bufferRowLength; - if (copy->bufferImageHeight) - extent.height = copy->bufferImageHeight; - extent.depth = 1; - extent = meta_region_extent_el(format, &extent); - - VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; - VkFormat buffer_format = choose_buffer_format(format, aspect); - - VkImage vk_image; - VkResult result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = buffer_format, - .extent = extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = usage, - .flags = 0, - }, alloc, &vk_image); - assert(result == VK_SUCCESS); - - ANV_FROM_HANDLE(anv_image, image, vk_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - image->bo = buffer->bo; - image->offset = buffer->offset + copy->bufferOffset; - - return image; -} - void anv_CmdCopyBufferToImage( VkCommandBuffer commandBuffer, VkBuffer srcBuffer, -- cgit v1.2.3 From 623ce595a97cc3ec47be042867e24047162cd371 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 24 Feb 2016 15:41:24 -0800 Subject: anv: Compile shader stages in pipeline order. Instead of the arbitrary order modules might be specified in. Acked-by: Jason Ekstrand --- src/intel/vulkan/anv_pipeline.c | 48 ++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 647f2eb96b0..fd6f8c92cfa 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1108,29 +1108,33 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->active_stages = 0; pipeline->total_scratch = 0; + const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, }; + struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - ANV_FROM_HANDLE(anv_shader_module, module, - pCreateInfo->pStages[i].module); - - switch (pCreateInfo->pStages[i].stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - default: - anv_finishme("Unsupported shader stage"); - } + gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; + pStages[stage] = &pCreateInfo->pStages[i]; + modules[stage] = anv_shader_module_from_handle(pStages[stage]->module); + } + + if (modules[MESA_SHADER_VERTEX]) { + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_VERTEX], + pStages[MESA_SHADER_VERTEX]->pName, + pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); + } + + if (modules[MESA_SHADER_GEOMETRY]) { + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_GEOMETRY], + pStages[MESA_SHADER_GEOMETRY]->pName, + pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo); + } + + if (modules[MESA_SHADER_FRAGMENT]) { + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, + modules[MESA_SHADER_FRAGMENT], + pStages[MESA_SHADER_FRAGMENT]->pName, + pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo); } if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { -- cgit v1.2.3 From 3ed260f54cc353398965037f12f39aafa086bcc5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 3 Mar 2016 16:21:09 -0800 Subject: hack to make dota 2 menus work --- src/intel/vulkan/genX_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a888c360673..a31ecc398e7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -721,8 +721,8 @@ void genX(CmdBeginRenderPass)( const VkRect2D *render_area = &pRenderPassBegin->renderArea; anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMin = MAX2(render_area->offset.y, 0), + .ClippedDrawingRectangleXMin = MAX2(render_area->offset.x, 0), .ClippedDrawingRectangleYMax = render_area->offset.y + render_area->extent.height - 1, .ClippedDrawingRectangleXMax = -- cgit v1.2.3 From 9d7faadd8a319d07616d7288451562bf8b9a4be8 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 3 Mar 2016 16:43:49 -0800 Subject: anv: Fix backwards shadow comparisons sample_c is backwards from what GL and Vulkan expect. See intel_state.c in i965. v2: Drop unused vk_to_gen_compare_op. Reviewed-by: Jason Ekstrand --- src/intel/vulkan/genX_state.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 63ea26937e5..900f6dc8eec 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -173,15 +173,26 @@ static const uint32_t vk_to_gen_tex_address[] = { [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, }; -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +/* Vulkan specifies the result of shadow comparisons as: + * 1 if ref texel, + * 0 otherwise. + * + * The hardware does: + * 0 if texel ref, + * 1 otherwise. + * + * So, these look a bit strange because there's both a negation + * and swapping of the arguments involved. + */ +static const uint32_t vk_to_gen_shadow_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPALWAYS, + [VK_COMPARE_OP_LESS] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLESS, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGREATER, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPNEVER, }; VkResult genX(CreateSampler)( @@ -228,7 +239,7 @@ VkResult genX(CreateSampler)( .ChromaKeyEnable = 0, .ChromaKeyIndex = 0, .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .ShadowFunction = vk_to_gen_shadow_compare_op[pCreateInfo->compareOp], .CubeSurfaceControlMode = OVERRIDE, .BorderColorPointer = border_color_offset, -- cgit v1.2.3 From fa8539dd6b1fd9bb1fc7f2f79889aacf14231d13 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 00:01:25 -0800 Subject: anv/pipeline: Respect pRasterizationState->depthBiasEnable --- src/intel/vulkan/gen7_cmd_buffer.c | 6 ------ src/intel/vulkan/gen7_pipeline.c | 3 +++ src/intel/vulkan/gen8_cmd_buffer.c | 6 ------ src/intel/vulkan/gen8_pipeline.c | 3 +++ 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 3ed93137f6a..1713cc17836 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -525,9 +525,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; @@ -543,9 +540,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) GENX(3DSTATE_SF_header), .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 7d283f18f40..c114bfef3ac 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -67,6 +67,9 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t VertexSubPixelPrecisionSelect; */ .UsePointWidthState = !pipeline->writes_point_size, .PointWidth = 1.0, + .GlobalDepthOffsetConstant = info->depthBiasEnable, + .GlobalDepthOffsetScale = info->depthBiasEnable, + .GlobalDepthOffsetClamp = info->depthBiasEnable, }; GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 9d4926f86cd..d506cf48b0d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -331,15 +331,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; struct GENX(3DSTATE_RASTER) raster = { GENX(3DSTATE_RASTER_header), - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 494a64949b6..a464006566b 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -90,6 +90,9 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportZFarClipTestEnable = true, .ViewportZNearClipTestEnable = true, #endif + .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, + .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, + .GlobalDepthOffsetEnablePoint = info->depthBiasEnable, }; GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); -- cgit v1.2.3 From a8afd296537a0e61fd12c817c3003309346a5e75 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 00:02:18 -0800 Subject: anv/pipeline: Use the right provoking vertex for triangle fans --- src/intel/vulkan/gen7_pipeline.c | 4 ++-- src/intel/vulkan/gen8_pipeline.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index c114bfef3ac..22a892bba3a 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -61,7 +61,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, /* uint32_t AALineDistanceMode; */ /* uint32_t VertexSubPixelPrecisionSelect; */ @@ -230,7 +230,7 @@ genX(graphics_pipeline_create)( .ClipMode = CLIPMODE_NORMAL, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, .MinimumPointWidth = 0.125, .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index a464006566b..52629a73342 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -59,7 +59,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .ViewportTransformEnable = !(extra && extra->disable_viewport), .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, .PointWidthSource = pipeline->writes_point_size ? Vertex : State, .PointWidth = 1.0, }; -- cgit v1.2.3 From fcd8e571851c18a259fdc4ccb34f6ba23f3d29ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 1 Mar 2016 13:39:04 -0800 Subject: anv/pipeline: More competent gen8 clipping --- src/intel/vulkan/gen8_pipeline.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 52629a73342..ecb8f6d7b09 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -326,7 +326,21 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, + .EarlyCullEnable = true, + .APIMode = 1, /* D3D */ .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + + .ClipMode = + pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? + REJECT_ALL : NORMAL, + + .NonPerspectiveBarycentricEnable = + (pipeline->wm_prog_data.barycentric_interp_modes & 0x38) != 0, + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 1, + .MinimumPointWidth = 0.125, .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); -- cgit v1.2.3 From ec18fef88d8a7a1a3541b0d40708a6637412f50e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 10:45:24 -0800 Subject: anv/pipeline: Set StencilBufferWriteEnable from the pipeline The hardware docs say that StencilBufferWriteEnable should only be set if StencilTestEnable is set. It seems reasonable to set them together. --- src/intel/vulkan/gen7_cmd_buffer.c | 3 --- src/intel/vulkan/gen7_pipeline.c | 1 + src/intel/vulkan/gen8_cmd_buffer.c | 3 --- src/intel/vulkan/gen8_pipeline.c | 1 + 4 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 1713cc17836..71010583129 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -581,9 +581,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 22a892bba3a..d563a8c26cd 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -95,6 +95,7 @@ gen7_emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, + .StencilBufferWriteEnable = info->stencilTestEnable, .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index d506cf48b0d..8e7a078d84b 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -384,9 +384,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = { GENX(3DSTATE_WM_DEPTH_STENCIL_header), - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index ecb8f6d7b09..e8a067851cc 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -227,6 +227,7 @@ emit_ds_state(struct anv_pipeline *pipeline, .DoubleSidedStencilEnable = true, .StencilTestEnable = info->stencilTestEnable, + .StencilBufferWriteEnable = info->stencilTestEnable, .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], -- cgit v1.2.3 From d61dcec64dc66fea7f15f296212c68f18fe5aaa0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 11:07:27 -0800 Subject: anv/clear: Pull the stencil write mask from the pipeline The stencil write mask wasn't getting set at all so we were using whatever write mask happend to be left over by the application. --- src/intel/vulkan/anv_meta_clear.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 227f8f35115..c052b8b11f6 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -45,7 +45,8 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, anv_meta_save(saved_state, cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR) | - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); cmd_buffer->state.dynamic.viewport.count = 0; cmd_buffer->state.dynamic.scissor.count = 0; @@ -193,6 +194,7 @@ create_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 9, .pDynamicStates = (VkDynamicState[]) { + /* Everything except stencil write mask */ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, @@ -200,7 +202,6 @@ create_pipeline(struct anv_device *device, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }, }, -- cgit v1.2.3 From f374765ce685d30b4eabe8085dc3daa95a75f8f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:22:32 -0800 Subject: anv/cmd_buffer: Mask stencil reference values --- src/intel/vulkan/gen7_cmd_buffer.c | 4 ++-- src/intel/vulkan/gen8_cmd_buffer.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 71010583129..985907872fa 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -561,8 +561,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = d->stencil_reference.front, - .BackFaceStencilReferenceValue = d->stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8e7a078d84b..dc0d5570904 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -361,8 +361,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = d->stencil_reference.front, - .BackFaceStencilReferenceValue = d->stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); @@ -437,8 +437,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, - .StencilReferenceValue = d->stencil_reference.front, - .BackfaceStencilReferenceValue = d->stencil_reference.back + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); -- cgit v1.2.3 From d154a5ebd68b3d1f465d6bb77e34b3ff04bc8a9f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:23:01 -0800 Subject: anv/cmd_buffer: Let the pipeline set StencilBufferWriteEnable on gen9 --- src/intel/vulkan/gen8_cmd_buffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index dc0d5570904..8972a8db6fc 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -428,9 +428,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { GEN9_3DSTATE_WM_DEPTH_STENCIL_header, - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - .StencilTestMask = d->stencil_compare_mask.front & 0xff, .StencilWriteMask = d->stencil_write_mask.front & 0xff, -- cgit v1.2.3 From b80c8ebc4587a15c823b5223419eadb0ca51cdd1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 3 Mar 2016 15:40:13 -0800 Subject: isl: Get rid of isl_surf_fill_state_info::level0_extent_px This field is no longer needed. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.h | 9 --------- src/intel/isl/isl_surface_state.c | 8 ++++---- src/intel/vulkan/anv_image.c | 33 +++------------------------------ 3 files changed, 7 insertions(+), 43 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 5a48bce6c64..248a94d64eb 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -772,15 +772,6 @@ struct isl_surf_fill_state_info { */ uint32_t mocs; - /** - * This allows the caller to over-ride the dimensions of the surface. - * This is used at the moment for compressed surfaces to let us hack - * around the fact that we can't actually render to them. - * - * FIXME: We really need to get rid of this. It's a lie. - */ - struct isl_extent4d level0_extent_px; - /** * The clear color for this surface * diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 1607aa6233d..fe8f07cfe30 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -257,8 +257,8 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, .SurfaceQPitch = get_qpitch(info->surf) >> 2, #endif - .Width = info->level0_extent_px.width - 1, - .Height = info->level0_extent_px.height - 1, + .Width = info->surf->logical_level0_px.width - 1, + .Height = info->surf->logical_level0_px.height - 1, .Depth = 0, /* TEMPLATE */ .SurfacePitch = info->surf->row_pitch - 1, @@ -338,7 +338,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, * If the volume texture is MIP-mapped, this field specifies the * depth of the base MIP level. */ - s.Depth = info->level0_extent_px.depth - 1; + s.Depth = info->surf->logical_level0_px.depth - 1; /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: * @@ -346,7 +346,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, * indicates the extent of the accessible 'R' coordinates minus 1 on * the LOD currently being rendered to. */ - s.RenderTargetViewExtent = info->level0_extent_px.depth - 1; + s.RenderTargetViewExtent = info->surf->logical_level0_px.depth - 1; break; default: unreachable(!"bad SurfaceType"); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index dc1ea9c80cc..c76a5f6dba9 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -540,30 +540,6 @@ anv_image_view_init(struct anv_image_view *iview, }, }; - struct isl_extent4d level0_extent_px; - - if (!isl_format_is_compressed(format) && - isl_format_is_compressed(image->format->isl_format)) { - /* Scale the ImageView extent by the backing Image. This is used - * internally when an uncompressed ImageView is created on a - * compressed Image. The ImageView can therefore be used for copying - * data from a source Image to a destination Image. - */ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - - level0_extent_px.depth = anv_minify(image->extent.depth, range->baseMipLevel); - level0_extent_px.depth = DIV_ROUND_UP(level0_extent_px.depth, isl_layout->bd); - - level0_extent_px.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - level0_extent_px.width = isl_surf_get_row_pitch_el(&surface->isl); - isl_view.base_level = 0; - isl_view.base_array_layer = 0; - } else { - level0_extent_px.width = image->extent.width; - level0_extent_px.height = image->extent.height; - level0_extent_px.depth = image->extent.depth; - } - iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width , range->baseMipLevel), .height = anv_minify(image->extent.height, range->baseMipLevel), @@ -586,8 +562,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->sampler_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); if (!device->info.has_llc) anv_state_clflush(iview->sampler_surface_state); @@ -603,8 +578,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->color_rt_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); if (!device->info.has_llc) anv_state_clflush(iview->color_rt_surface_state); @@ -621,8 +595,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->storage_surface_state.map, .surf = &surface->isl, .view = &isl_view, - .mocs = device->default_mocs, - .level0_extent_px = level0_extent_px); + .mocs = device->default_mocs); } else { anv_fill_buffer_surface_state(device, iview->storage_surface_state, ISL_FORMAT_RAW, -- cgit v1.2.3 From f700d16a892cbedc58a8c7850f1e75d1c127ce26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 15:38:11 -0800 Subject: anv/cmd_buffer: Include Haswell in set_subpass --- src/intel/vulkan/anv_cmd_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 418a143b7bc..aa54bdecb0a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -965,7 +965,11 @@ anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, { switch (cmd_buffer->device->info.gen) { case 7: - gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + if (cmd_buffer->device->info.is_haswell) { + gen75_cmd_buffer_set_subpass(cmd_buffer, subpass); + } else { + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + } break; case 8: gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); -- cgit v1.2.3 From 653261285e1758f6fde0fb49b3fe30d6d2631077 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 12:42:03 -0800 Subject: anv/cmd_buffer: Reset the state streams when resetting the command buffer --- src/intel/vulkan/anv_cmd_buffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index aa54bdecb0a..6ff5f35bc6a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -333,6 +333,14 @@ VkResult anv_ResetCommandBuffer( anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); anv_cmd_state_reset(cmd_buffer); + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &cmd_buffer->device->surface_state_block_pool); + + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &cmd_buffer->device->dynamic_state_block_pool); + return VK_SUCCESS; } -- cgit v1.2.3 From cc57efc67abb5b81ebc2648775d8829ab27b7df8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 17:56:12 -0800 Subject: anv/pipeline: Fix depthBiasEnable on gen7 The first time I tried to fix this, I set the wrong fields. --- src/intel/vulkan/gen7_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index d563a8c26cd..5235d399ce5 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -67,9 +67,9 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t VertexSubPixelPrecisionSelect; */ .UsePointWidthState = !pipeline->writes_point_size, .PointWidth = 1.0, - .GlobalDepthOffsetConstant = info->depthBiasEnable, - .GlobalDepthOffsetScale = info->depthBiasEnable, - .GlobalDepthOffsetClamp = info->depthBiasEnable, + .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, + .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, + .GlobalDepthOffsetEnablePoint = info->depthBiasEnable, }; GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); -- cgit v1.2.3 From c1436e80efee072f1fc3e3b4af0d5e7ad9dd3fb7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Mar 2016 19:14:48 -0800 Subject: anv/meta_clear: Set the right number of dynamic states --- src/intel/vulkan/anv_meta_clear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index c052b8b11f6..8f92a9d755c 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -192,7 +192,7 @@ create_pipeline(struct anv_device *device, * we need only restore dynamic state was vkCmdSet. */ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, + .dynamicStateCount = 8, .pDynamicStates = (VkDynamicState[]) { /* Everything except stencil write mask */ VK_DYNAMIC_STATE_VIEWPORT, -- cgit v1.2.3 From 4e75f9b219f674ea79e6d521dd8a6b1ccd8b3c10 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 4 Mar 2016 20:41:05 -0800 Subject: anv: Implement VK_REMAINING_{MIP_LEVELS,ARRAY_LAYERS} v2: Subtract the baseMipLevel and baseArrayLayer (Jason) Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 12 ++++++------ src/intel/vulkan/anv_meta_clear.c | 4 ++-- src/intel/vulkan/anv_private.h | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index c76a5f6dba9..143a08413f7 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -388,9 +388,9 @@ anv_validate_CreateImageView(VkDevice _device, assert(subresource->levelCount > 0); assert(subresource->layerCount > 0); assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels); assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size); assert(pView); const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT @@ -496,10 +496,10 @@ anv_image_view_init(struct anv_image_view *iview, unreachable("bad VkImageType"); case VK_IMAGE_TYPE_1D: case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= image->array_size); break; case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + range->layerCount - 1 + assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= anv_minify(image->extent.depth, range->baseMipLevel)); break; } @@ -525,9 +525,9 @@ anv_image_view_init(struct anv_image_view *iview, struct isl_view isl_view = { .format = format, .base_level = range->baseMipLevel, - .levels = range->levelCount, + .levels = anv_get_levelCount(image, range), .base_array_layer = range->baseArrayLayer, - .array_len = range->layerCount, + .array_len = anv_get_layerCount(image, range), .channel_select = { remap_swizzle(pCreateInfo->components.r, VK_COMPONENT_SWIZZLE_R, swizzle), diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 8f92a9d755c..bce94460844 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -795,8 +795,8 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; - for (uint32_t l = 0; l < range->levelCount; ++l) { - for (uint32_t s = 0; s < range->layerCount; ++s) { + for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) { + for (uint32_t s = 0; s < anv_get_layerCount(image, range); ++s) { struct anv_image_view iview; anv_image_view_init(&iview, cmd_buffer->device, &(VkImageViewCreateInfo) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cb4f9736fdf..f87270466ae 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1641,6 +1641,23 @@ struct anv_image { }; }; +static inline uint32_t +anv_get_layerCount(const struct anv_image *image, + const VkImageSubresourceRange *range) +{ + return range->layerCount == VK_REMAINING_ARRAY_LAYERS ? + image->array_size - range->baseArrayLayer : range->layerCount; +} + +static inline uint32_t +anv_get_levelCount(const struct anv_image *image, + const VkImageSubresourceRange *range) +{ + return range->levelCount == VK_REMAINING_MIP_LEVELS ? + image->levels - range->baseMipLevel : range->levelCount; +} + + struct anv_image_view { const struct anv_image *image; /**< VkImageViewCreateInfo::image */ struct anv_bo *bo; -- cgit v1.2.3 From 81f30e2f509b4fcd79376ff02363aba831918ac6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 5 Mar 2016 00:54:54 -0800 Subject: anv/hsw: Move query code to genX file for Haswell This fixes many CTS cases, but will require an update to the kernel command parser register whitelist. (The CS GPRs and TIMESTAMP registers need to be whitelisted.) Signed-off-by: Jordan Justen --- src/intel/vulkan/gen8_cmd_buffer.c | 240 ------------------------------------ src/intel/vulkan/genX_cmd_buffer.c | 244 +++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+), 240 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8972a8db6fc..8d8775fb01d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -601,246 +601,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -static void -emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .DepthStallEnable = true, - .Address = { bo, offset }); -} - -static void -emit_query_availability(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { bo, offset }, - .ImmediateData = 1); -} - -void genX(CmdBeginQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - /* Workaround: When meta uses the pipeline with the VS disabled, it seems - * that the pipelining of the depth write breaks. What we see is that - * samples from the render pass clear leaks into the first query - * immediately after the clear. Doing a pipecontrol with a post-sync - * operation and DepthStallEnable seems to work around the issue. - */ - if (cmd_buffer->state.need_query_wa) { - cmd_buffer->state.need_query_wa = false; - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthCacheFlushEnable = true, - .DepthStallEnable = true); - } - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void genX(CmdEndQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 8); - - emit_query_availability(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 16); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void genX(CmdWriteTimestamp)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = query * sizeof(struct anv_query_pool_slot); - - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); - - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { &pool->bo, offset }); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { &pool->bo, offset + 4 }); - break; - - default: - /* Everything else is bottom-of-pipe */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = { &pool->bo, offset }); - break; - } - - emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); -} - -#define alu_opcode(v) __gen_uint((v), 20, 31) -#define alu_operand1(v) __gen_uint((v), 10, 19) -#define alu_operand2(v) __gen_uint((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -static void -store_query_result(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) -{ - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - break; - - case VK_QUERY_TYPE_TIMESTAMP: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(2), &pool->bo, slot_offset); - break; - - default: - unreachable("unhandled query type"); - } - - store_query_result(&cmd_buffer->batch, - CS_GPR(2), buffer->bo, dst_offset, flags); - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), - &pool->bo, slot_offset + 16); - if (flags & VK_QUERY_RESULT_64_BIT) - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 8, flags); - else - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 4, flags); - } - - dst_offset += destStride; - } -} - void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a31ecc398e7..82959f3abf6 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -754,3 +754,247 @@ void genX(CmdEndRenderPass)( anv_cmd_buffer_resolve_subpass(cmd_buffer); } + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#if GEN_GEN > 7 || GEN_IS_HASWELL + +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +#endif -- cgit v1.2.3 From 9a90176d488c00700cbd832a6a2d53a78114a21e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 08:45:01 -0800 Subject: anv/pipeline: Calculate the correct max_source_attr for 3DSTATE_SBE --- src/intel/vulkan/genX_pipeline_util.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 66250e5d4d6..28b2a032c46 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -288,13 +288,12 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) if (input_index < 0) continue; - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); + const int slot = fs_input_map->varying_to_slot[attr]; if (input_index >= 16) continue; - if (source_attr == -1) { + if (slot == -1) { /* This attribute does not exist in the VUE--that means that the * vertex shader did not write to it. It could be that it's a * regular varying read by the fragment shader but not written by @@ -308,10 +307,13 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) swiz.Attribute[input_index].ComponentOverrideZ = true; swiz.Attribute[input_index].ComponentOverrideW = true; } else { + assert(slot >= 2); + const int source_attr = slot - 2; + max_source_attr = MAX2(max_source_attr, source_attr); /* We have to subtract two slots to accout for the URB entry output * read offset in the VS and GS stages. */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + swiz.Attribute[input_index].SourceAttribute = source_attr; } } -- cgit v1.2.3 From 7c1660aa14094e40fba9f39ce194cb6238311b65 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 09:13:44 -0800 Subject: anv: Don't allow D16_UNORM to be combined with stencil Among other things, this can cause the depth or stencil test to spurriously fail when the fragment shader uses discard. --- src/intel/vulkan/anv_formats.c | 2 +- src/intel/vulkan/genX_cmd_buffer.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index b4b52aa6053..4d279a8fb72 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -161,7 +161,7 @@ static const struct anv_format anv_formats[] = { fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_UNSUPPORTED), fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 82959f3abf6..88cc13b580a 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -646,11 +646,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * * The PRM is wrong, though. The width and height must be programmed to * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. + * nor stencil buffer is present. Also, D16_UNORM is not allowed to + * be combined with a stencil buffer so we use D32_FLOAT instead. */ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, + .SurfaceFormat = D32_FLOAT, .Width = fb->width - 1, .Height = fb->height - 1, .StencilWriteEnable = has_stencil); -- cgit v1.2.3 From 1afdfc3e6e022a4e5e9701b365c4f92554be999a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 09:19:01 -0800 Subject: anv/pipeline: Implement the depth compare EQUAL workaround on gen8+ --- src/intel/vulkan/gen8_pipeline.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index e8a067851cc..4a5e8674189 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -238,6 +238,14 @@ emit_ds_state(struct anv_pipeline *pipeline, .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], }; + /* From the Broadwell PRM: + * + * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the + * Depth_Write_Enable must be set to 0." + */ + if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL) + wm_depth_stencil.DepthBufferWriteEnable = false; + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); } -- cgit v1.2.3 From ab36eae5e78a2edf4f699fc43fc9c89e90aabd07 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:10:29 -0800 Subject: anv: Remove left-over bits of sparse-descriptor code --- src/intel/vulkan/anv_pipeline.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index fd6f8c92cfa..c93b1a07246 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -366,10 +366,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Set up dynamic offsets */ anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); - char surface_usage_mask[256], sampler_usage_mask[256]; - zero(surface_usage_mask); - zero(sampler_usage_mask); - /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); -- cgit v1.2.3 From 3baf8af947ab0d4e016f79ed76d0bab78c0294ef Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:18:30 -0800 Subject: anv: Remove excess whitespace --- src/intel/vulkan/anv_pipeline_cache.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c89bb2a2ee1..c8ff7e52996 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -165,7 +165,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) table = malloc(byte_size); if (table == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; - + cache->table = table; cache->table_size = table_size; cache->kernel_count = 0; @@ -176,7 +176,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) const uint32_t offset = old_table[i]; if (offset == ~0) continue; - + struct cache_entry *entry = cache->program_stream.block_pool->map + offset; anv_pipeline_cache_add_entry(cache, entry, offset); @@ -228,7 +228,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, } pthread_mutex_unlock(&cache->mutex); - + memcpy(state.map + preamble_size, kernel, kernel_size); if (!cache->device->info.has_llc) @@ -240,14 +240,14 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, static void anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *data, size_t size) -{ +{ struct anv_device *device = cache->device; uint8_t uuid[VK_UUID_SIZE]; struct { uint32_t device_id; uint8_t uuid[VK_UUID_SIZE]; } header; - + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -259,7 +259,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *end = data + size; const void *p = data + sizeof(header); - + while (p < end) { /* The kernels aren't 64 byte aligned in the serialized format so * they're always right after the prog_data. @@ -327,7 +327,7 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); const size_t size = 4 + VK_UUID_SIZE + cache->total_size; - + if (pData == NULL) { *pDataSize = size; return VK_SUCCESS; @@ -341,10 +341,10 @@ VkResult anv_GetPipelineCacheData( void *p = pData; memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); p += sizeof(device->chipset_id); - + anv_device_get_cache_uuid(p); p += VK_UUID_SIZE; - + struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { if (cache->table[i] == ~0) @@ -357,7 +357,7 @@ VkResult anv_GetPipelineCacheData( void *kernel = (void *) entry + align_u32(sizeof(*entry) + entry->prog_data_size, 64); - + memcpy(p, kernel, entry->kernel_size); p += entry->kernel_size; } @@ -375,7 +375,7 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct cache_entry *entry = src->program_stream.block_pool->map + src->table[i]; - + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; @@ -400,6 +400,6 @@ VkResult anv_MergePipelineCaches( anv_pipeline_cache_merge(dst, src); } - + return VK_SUCCESS; } -- cgit v1.2.3 From 26ed943eb961e3c9cb939097dbbdb5bd547e4302 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:21:17 -0800 Subject: anv: Fix shader key hashing This was copied from inline code to a helper and wasn't updated to hash a pointer instead. --- src/intel/vulkan/anv_pipeline_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c8ff7e52996..7e20ff74db0 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -86,7 +86,7 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct mesa_sha1 *ctx; ctx = _mesa_sha1_init(); - _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, key, key_size); _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); /* hash in shader stage, pipeline layout? */ -- cgit v1.2.3 From cd812f086e4eda30ae4859bdfef21f06700918a9 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:39:59 -0800 Subject: anv: Use 1.0 pipeline cache header The final version of the pipeline cache header adds a few more fields. --- src/intel/vulkan/anv_pipeline_cache.c | 36 ++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 7e20ff74db0..fa41637d2c0 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -237,20 +237,31 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, return state.offset + preamble_size; } +struct cache_header { + uint32_t header_size; + uint32_t header_version; + uint32_t vendor_id; + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; +}; + static void anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const void *data, size_t size) { struct anv_device *device = cache->device; + struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; - struct { - uint32_t device_id; - uint8_t uuid[VK_UUID_SIZE]; - } header; if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); + if (header.header_size < sizeof(header)) + return; + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + return; + if (header.vendor_id != 0x8086) + return; if (header.device_id != device->chipset_id) return; anv_device_get_cache_uuid(uuid); @@ -258,7 +269,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, return; const void *end = data + size; - const void *p = data + sizeof(header); + const void *p = data + header.header_size; while (p < end) { /* The kernels aren't 64 byte aligned in the serialized format so @@ -325,8 +336,9 @@ VkResult anv_GetPipelineCacheData( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + struct cache_header *header; - const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + const size_t size = sizeof(*header) + cache->total_size; if (pData == NULL) { *pDataSize = size; @@ -339,11 +351,13 @@ VkResult anv_GetPipelineCacheData( } void *p = pData; - memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); - p += sizeof(device->chipset_id); - - anv_device_get_cache_uuid(p); - p += VK_UUID_SIZE; + header = p; + header->header_size = sizeof(*header); + header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->vendor_id = 0x8086; + header->device_id = device->chipset_id; + anv_device_get_cache_uuid(header->uuid); + p += header->header_size; struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { -- cgit v1.2.3 From c028ffea7085297ea21d565dbc3913162ab70635 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:48:31 -0800 Subject: anv: Serialize as much pipeline cache as we can We can serialize as much as the application asks for and just stop once we run out of memory. This lets applications use a fixed amount of space for caching and still get some benefit. --- src/intel/vulkan/anv_pipeline_cache.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index fa41637d2c0..932baddb83a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -77,6 +77,16 @@ struct cache_entry { /* kernel follows prog_data at next 64 byte aligned address */ }; +static uint32_t +entry_size(struct cache_entry *entry) +{ + /* This returns the number of bytes needed to serialize an entry, which + * doesn't include the alignment padding bytes. + */ + + return sizeof(*entry) + entry->prog_data_size + entry->kernel_size; +} + void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct anv_shader_module *module, @@ -146,10 +156,7 @@ anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, } } - /* We don't include the alignment padding bytes when we serialize, so - * don't include taht in the the total size. */ - cache->total_size += - sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->total_size += entry_size(entry); cache->kernel_count++; } @@ -345,12 +352,12 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } - if (*pDataSize < size) { + if (*pDataSize < sizeof(*header)) { *pDataSize = 0; return VK_INCOMPLETE; } - void *p = pData; + void *p = pData, *end = pData + *pDataSize; header = p; header->header_size = sizeof(*header); header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; @@ -365,6 +372,8 @@ VkResult anv_GetPipelineCacheData( continue; entry = cache->program_stream.block_pool->map + cache->table[i]; + if (end < p + entry_size(entry)) + break; memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); p += sizeof(*entry) + entry->prog_data_size; @@ -376,6 +385,8 @@ VkResult anv_GetPipelineCacheData( p += entry->kernel_size; } + *pDataSize = p - pData; + return VK_SUCCESS; } -- cgit v1.2.3 From 37c5e7025333fed2943630fa94e59ef2d413030b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 3 Mar 2016 16:52:20 -0800 Subject: anv: Rename 'table' to 'hash_table' in anv_pipeline_cache A little less ambiguous. --- src/intel/vulkan/anv_pipeline_cache.c | 32 ++++++++++++++++---------------- src/intel/vulkan/anv_private.h | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 932baddb83a..48f36706fef 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -49,15 +49,15 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache, cache->kernel_count = 0; cache->total_size = 0; cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->table[0]); - cache->table = malloc(byte_size); + const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); + cache->hash_table = malloc(byte_size); /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ - if (cache->table == NULL) + if (cache->hash_table == NULL) cache->table_size = 0; else - memset(cache->table, 0xff, byte_size); + memset(cache->hash_table, 0xff, byte_size); } void @@ -65,7 +65,7 @@ anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->table); + free(cache->hash_table); } struct cache_entry { @@ -117,7 +117,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->table[index]; + const uint32_t offset = cache->hash_table[index]; if (offset == ~0) return NO_KERNEL; @@ -150,8 +150,8 @@ anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; - if (cache->table[index] == ~0) { - cache->table[index] = entry_offset; + if (cache->hash_table[index] == ~0) { + cache->hash_table[index] = entry_offset; break; } } @@ -165,20 +165,20 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) { const uint32_t table_size = cache->table_size * 2; const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->table[0]); + const size_t byte_size = table_size * sizeof(cache->hash_table[0]); uint32_t *table; - uint32_t *old_table = cache->table; + uint32_t *old_table = cache->hash_table; table = malloc(byte_size); if (table == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; - cache->table = table; + cache->hash_table = table; cache->table_size = table_size; cache->kernel_count = 0; cache->total_size = 0; - memset(cache->table, 0xff, byte_size); + memset(cache->hash_table, 0xff, byte_size); for (uint32_t i = 0; i < old_table_size; i++) { const uint32_t offset = old_table[i]; if (offset == ~0) @@ -368,10 +368,10 @@ VkResult anv_GetPipelineCacheData( struct cache_entry *entry; for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->table[i] == ~0) + if (cache->hash_table[i] == ~0) continue; - entry = cache->program_stream.block_pool->map + cache->table[i]; + entry = cache->program_stream.block_pool->map + cache->hash_table[i]; if (end < p + entry_size(entry)) break; @@ -395,11 +395,11 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct anv_pipeline_cache *src) { for (uint32_t i = 0; i < src->table_size; i++) { - if (src->table[i] == ~0) + if (src->hash_table[i] == ~0) continue; struct cache_entry *entry = - src->program_stream.block_pool->map + src->table[i]; + src->program_stream.block_pool->map + src->hash_table[i]; if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f87270466ae..32c8b13c952 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -633,7 +633,7 @@ struct anv_pipeline_cache { uint32_t total_size; uint32_t table_size; uint32_t kernel_count; - uint32_t *table; + uint32_t * hash_table; }; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, -- cgit v1.2.3 From 2b29342fae14d8626ca58f8a7ec358b70886ced3 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 08:15:16 -0800 Subject: anv: Store prog data in pipeline cache stream We have to keep it there for the cache to work, so let's not have an extra copy in struct anv_pipeline too. --- src/intel/vulkan/anv_cmd_buffer.c | 6 +- src/intel/vulkan/anv_pipeline.c | 104 +++++++++++++++++++--------------- src/intel/vulkan/anv_pipeline_cache.c | 39 +++++++------ src/intel/vulkan/anv_private.h | 35 +++++++++--- src/intel/vulkan/gen7_cmd_buffer.c | 5 +- src/intel/vulkan/gen7_pipeline.c | 12 ++-- src/intel/vulkan/gen8_cmd_buffer.c | 5 +- src/intel/vulkan/gen8_pipeline.c | 30 +++++----- src/intel/vulkan/genX_cmd_buffer.c | 20 +++---- src/intel/vulkan/genX_pipeline.c | 2 +- src/intel/vulkan/genX_pipeline_util.h | 36 +++++++----- 11 files changed, 171 insertions(+), 123 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6ff5f35bc6a..5ec242fbf2a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; @@ -996,7 +996,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, { struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = + const struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->prog_data[stage]; /* If we don't actually have any push constants, bail. */ @@ -1027,7 +1027,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_push_constants *data = cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index c93b1a07246..868215cd22d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -406,7 +406,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - struct brw_stage_prog_data *prog_data) + const struct brw_stage_prog_data *prog_data) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { @@ -436,7 +436,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_vs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -446,17 +446,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_vs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -465,31 +465,36 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); - prog_data->inputs_read = nir->info.inputs_read; + prog_data.inputs_read = nir->info.inputs_read; if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, false, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, + sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + const struct brw_vs_prog_data *vs_prog_data = + (const struct brw_vs_prog_data *) stage_prog_data; + + if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { pipeline->vs_simd8 = kernel; pipeline->vs_vec4 = NO_KERNEL; } else { @@ -498,7 +503,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -513,7 +518,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_gs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -523,17 +528,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_gs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -546,13 +551,13 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); @@ -560,9 +565,10 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } @@ -570,7 +576,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -586,7 +592,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_wm_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -599,19 +605,19 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_wm_prog_data prog_data = { 0, }; - prog_data->binding_table.render_target_start = 0; + prog_data.binding_table.render_target_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -635,27 +641,31 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->no_8) + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *) stage_prog_data; + + if (wm_prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else pipeline->ps_simd8 = kernel; - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + if (wm_prog_data->no_8 || wm_prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + wm_prog_data->prog_offset_16; } else { pipeline->ps_simd16 = NO_KERNEL; } @@ -664,18 +674,18 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->ps_grf_start2 = 0; if (pipeline->ps_simd8 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; } } else if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -690,7 +700,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_cs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -700,23 +710,23 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_cs_prog_data prog_data = { 0, }; - prog_data->binding_table.work_groups_start = 0; + prog_data.binding_table.work_groups_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - prog_data->base.total_shared = nir->num_shared; + prog_data.base.total_shared = nir->num_shared; void *mem_ctx = ralloc_context(NULL); @@ -725,23 +735,24 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -751,10 +762,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) { const struct brw_device_info *devinfo = &pipeline->device->info; bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_size = vs_present ? + get_vs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned vs_entry_size_bytes = vs_size * 64; bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_size = gs_present ? + get_gs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned gs_entry_size_bytes = gs_size * 64; /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): @@ -1136,7 +1149,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { /* Vertex is only optional if disable_vs is set */ assert(extra->disable_vs); - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); } gen7_compute_urb_partition(pipeline); @@ -1152,7 +1164,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, */ inputs_read = ~0ull; } else { - inputs_read = pipeline->vs_prog_data.inputs_read; + inputs_read = get_vs_prog_data(pipeline)->inputs_read; } pipeline->vb_used = 0; diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 48f36706fef..024fdf7d5a9 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -110,7 +110,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data) + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -126,7 +127,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, cache->program_stream.block_pool->map + offset; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { if (prog_data) - memcpy(prog_data, entry->prog_data, entry->prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; const uint32_t preamble_size = align_u32(sizeof(*entry) + entry->prog_data_size, 64); @@ -198,17 +199,14 @@ uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, size_t prog_data_size) + const struct brw_stage_prog_data **prog_data, + size_t prog_data_size) { pthread_mutex_lock(&cache->mutex); struct cache_entry *entry; - /* Meta pipelines don't have SPIR-V, so we can't hash them. - * Consequentally, they just don't get cached. - */ - const uint32_t preamble_size = sha1 ? - align_u32(sizeof(*entry) + prog_data_size, 64) : - 0; + const uint32_t preamble_size = + align_u32(sizeof(*entry) + prog_data_size, 64); const uint32_t size = preamble_size + kernel_size; @@ -216,14 +214,16 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const struct anv_state state = anv_state_stream_alloc(&cache->program_stream, size, 64); + entry = state.map; + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, *prog_data, prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + entry->kernel_size = kernel_size; + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); - entry = state.map; - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, prog_data, prog_data_size); - entry->kernel_size = kernel_size; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); if (cache->kernel_count == cache->table_size / 2) anv_pipeline_cache_grow(cache); @@ -285,9 +285,13 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const struct cache_entry *entry = p; const void *kernel = &entry->prog_data[entry->prog_data_size]; + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(cache, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, + entry->prog_data_size); p = kernel + entry->kernel_size; } } @@ -406,9 +410,12 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, const void *kernel = (void *) entry + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(dst, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, entry->prog_data_size); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 32c8b13c952..70b6dd995a1 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -640,12 +640,13 @@ void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data); + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data); uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, + const struct brw_stage_prog_data **prog_data, size_t prog_data_size); struct anv_device { @@ -1404,12 +1405,8 @@ struct anv_pipeline { bool use_repclear; - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; bool writes_point_size; - struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { @@ -1457,6 +1454,30 @@ struct anv_pipeline { } gen9; }; +static inline const struct brw_vs_prog_data * +get_vs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; +} + +static inline const struct brw_gs_prog_data * +get_gs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; +} + +static inline const struct brw_wm_prog_data * +get_wm_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; +} + +static inline const struct brw_cs_prog_data * +get_cs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; +} + struct anv_graphics_pipeline_create_info { /** * If non-negative, overrides the color attachment count of the pipeline's diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 985907872fa..8dce586eec7 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -395,11 +395,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5235d399ce5..5f480edf809 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -250,7 +250,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xff); - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); #if 0 /* From gen7_vs_state.c */ @@ -277,18 +277,18 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_vec4, .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterforURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, .StatisticsEnable = true, .VSFunctionEnable = true); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); @@ -338,7 +338,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) anv_finishme("two-sided color needs sbe swizzling setup"); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8d8775fb01d..0d27c27f5b7 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -505,7 +505,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -558,11 +558,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 4a5e8674189..5ce1307f090 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -354,6 +354,7 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, @@ -363,15 +364,15 @@ genX(graphics_pipeline_create)( .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ? - 0 : pipeline->wm_prog_data.barycentric_interp_modes); + 0 : wm_prog_data->barycentric_interp_modes); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_kernel == NO_KERNEL) + if (pipeline->gs_kernel == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); - else + } else { + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_kernel, @@ -412,11 +413,12 @@ genX(graphics_pipeline_create)( .VertexURBEntryOutputReadOffset = offset, .VertexURBEntryOutputLength = length); + } - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); /* Skip the VUE header and position slots */ offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : pipeline->vs_vec4; @@ -435,7 +437,7 @@ genX(graphics_pipeline_create)( .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, + vs_prog_data->base.base.binding_table.size_bytes / 4, .ThreadDispatchPriority = false, .FloatingPointMode = IEEE754, .IllegalOpcodeExceptionEnable = false, @@ -443,11 +445,11 @@ genX(graphics_pipeline_create)( .SoftwareExceptionEnable = false, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, @@ -461,8 +463,6 @@ genX(graphics_pipeline_create)( .UserClipDistanceClipTestEnableBitmask = 0, .UserClipDistanceCullTestEnableBitmask = 0); - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (pipeline->ps_ksp0 == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 88cc13b580a..2606a66f2a7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -319,11 +319,11 @@ void genX(CmdDraw)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -346,11 +346,11 @@ void genX(CmdDrawIndexed)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -398,13 +398,13 @@ void genX(CmdDrawIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -429,14 +429,14 @@ void genX(CmdDrawIndexedIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -460,7 +460,7 @@ void genX(CmdDispatch)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); if (prog_data->uses_num_work_groups) { struct anv_state state = @@ -507,7 +507,7 @@ void genX(CmdDispatchIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; struct anv_batch *batch = &cmd_buffer->batch; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 1605661f971..cc8841ea8a0 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -82,7 +82,7 @@ genX(compute_pipeline_create)( pipeline->use_repclear = false; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 28b2a032c46..cd138dfae61 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -52,6 +52,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + uint32_t elements; if (extra && extra->disable_vs) { /* If the VS is disabled, just assume the user knows what they're @@ -63,7 +65,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements |= (1 << info->pVertexAttributeDescriptions[i].location); } else { /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + uint64_t inputs_read = vs_prog_data->inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); elements = inputs_read >> VERT_ATTRIB_GENERIC0; } @@ -72,16 +74,16 @@ emit_vertex_input(struct anv_pipeline *pipeline, /* On BDW+, we only need to allocate space for base ids. Setting up * the actual vertex and instance id is a separate packet. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #else /* On Haswell and prior, vertex and instance id are created by using the * ComponentControl fields, so we need an element for any of them. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid || - pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_vertexid || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #endif uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; @@ -148,8 +150,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, * This means, that if we have BaseInstance, we need BaseVertex as * well. Just do all or nothing. */ - uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance) ? + uint32_t base_ctrl = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0; struct GENX(VERTEX_ELEMENT_STATE) element = { @@ -171,10 +173,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, #if GEN_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDEnable = vs_prog_data->uses_vertexid, .VertexIDComponentNumber = 2, .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDEnable = vs_prog_data->uses_instanceid, .InstanceIDComponentNumber = 3, .InstanceIDElementOffset = id_slot); #endif @@ -222,17 +224,21 @@ emit_urb_setup(struct anv_pipeline *pipeline) static void emit_3dstate_sbe(struct anv_pipeline *pipeline) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &pipeline->vs_prog_data.base.vue_map; + fs_input_map = &vs_prog_data->base.vue_map; else - fs_input_map = &pipeline->gs_prog_data.base.vue_map; + fs_input_map = &gs_prog_data->base.vue_map; struct GENX(3DSTATE_SBE) sbe = { GENX(3DSTATE_SBE_header), .AttributeSwizzleEnable = true, .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, #if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, @@ -283,7 +289,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) int max_source_attr = 0; for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = pipeline->wm_prog_data.urb_setup[attr]; + int input_index = wm_prog_data->urb_setup[attr]; if (input_index < 0) continue; -- cgit v1.2.3 From 87967a2c854c200ba8a7cabe1fe3f7e19291f187 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 10:59:21 -0800 Subject: anv: Simplify pipeline cache control flow a bit No functional change, but the control flow around searching the cache and falling back to compiling is a bit simpler. --- src/intel/vulkan/anv_pipeline.c | 62 ++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 35 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 868215cd22d..6a0b48b620c 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -438,20 +438,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_vs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_vs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, @@ -484,7 +481,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -520,20 +518,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_gs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_gs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, @@ -566,7 +561,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, /* TODO: SIMD8 GS */ stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -594,8 +590,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_wm_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); @@ -603,14 +599,11 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, key.nr_color_regions = 1; if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (kernel == NO_KERNEL) { struct brw_wm_prog_data prog_data = { 0, }; prog_data.binding_table.render_target_start = 0; @@ -649,7 +642,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); @@ -702,17 +696,14 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; struct brw_cs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; + uint32_t kernel = NO_KERNEL; + unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); - } else { - hash = NULL; + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); } if (module->size == 0 || kernel == NO_KERNEL) { @@ -743,7 +734,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, } stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, hash, + kernel = anv_pipeline_cache_upload_kernel(cache, + module->size > 0 ? sha1 : NULL, shader_code, code_size, &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); -- cgit v1.2.3 From 07441c344c845bd663398529dbf484759d09cd54 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:21:43 -0800 Subject: anv: Rename anv_pipeline_cache_add_entry() to 'set' This function is a helper that unconditionally sets a hash table entry and expects the cache to have enough room. Calling it 'add_entry' suggests it will grow the cache as needed. --- src/intel/vulkan/anv_pipeline_cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 024fdf7d5a9..0b260528f81 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -140,7 +140,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, } static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, +anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, struct cache_entry *entry, uint32_t entry_offset) { const uint32_t mask = cache->table_size - 1; @@ -187,7 +187,7 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) struct cache_entry *entry = cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_add_entry(cache, entry, offset); + anv_pipeline_cache_set_entry(cache, entry, offset); } free(old_table); @@ -231,7 +231,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, * have enough space to add this new kernel. Only add it if there's room. */ if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_add_entry(cache, entry, state.offset); + anv_pipeline_cache_set_entry(cache, entry, state.offset); } pthread_mutex_unlock(&cache->mutex); -- cgit v1.2.3 From 626559ed3717a205c1315040caa4308e77c70eb5 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:25:23 -0800 Subject: anv: Add anv_pipeline_cache_add_entry() This function will grow the cache to make room and then add the entry. --- src/intel/vulkan/anv_pipeline_cache.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 0b260528f81..c85916fd1f8 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -195,6 +195,20 @@ anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) return VK_SUCCESS; } +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_set_entry(cache, entry, entry_offset); +} + uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, @@ -224,14 +238,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, state.offset); + anv_pipeline_cache_add_entry(cache, entry, state.offset); } pthread_mutex_unlock(&cache->mutex); -- cgit v1.2.3 From 584f39c65ed24d6c331d8ccf05d93678f3fafe16 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:27:31 -0800 Subject: anv: Don't re-upload shaders when merging Using anv_pipeline_cache_upload_kernel() will re-upload the kernel and prog_data when we merge caches. Since the kernel and prog_data is already in the program_stream, use anv_pipeline_cache_add_entry() instead to only add the entry to the hash table. --- src/intel/vulkan/anv_pipeline_cache.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index c85916fd1f8..376cd2a7716 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -406,23 +406,17 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct anv_pipeline_cache *src) { for (uint32_t i = 0; i < src->table_size; i++) { - if (src->hash_table[i] == ~0) + const uint32_t offset = src->hash_table[i]; + if (offset == ~0) continue; struct cache_entry *entry = - src->program_stream.block_pool->map + src->hash_table[i]; + src->program_stream.block_pool->map + offset; if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) continue; - const void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - const struct brw_stage_prog_data *prog_data = - (const struct brw_stage_prog_data *) entry->prog_data; - - anv_pipeline_cache_upload_kernel(dst, entry->sha1, - kernel, entry->kernel_size, - &prog_data, entry->prog_data_size); + anv_pipeline_cache_add_entry(dst, entry, offset); } } -- cgit v1.2.3 From 6139fe9a7790e0946e465f275d3f530552edbcdc Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 12:56:14 -0800 Subject: anv: Also cache the struct anv_pipeline_binding maps This is state the we generate when compiling the shaders and we need it for mapping resources from descriptor sets to binding table indices. --- src/intel/vulkan/anv_nir.h | 3 +- src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 38 +++----- src/intel/vulkan/anv_pipeline.c | 84 +++++++++++------ src/intel/vulkan/anv_pipeline_cache.c | 112 ++++++++++++++++------- src/intel/vulkan/anv_private.h | 8 +- 5 files changed, 158 insertions(+), 87 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index a7ea3eb0e28..606fd1c0565 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -37,7 +37,8 @@ void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, struct brw_stage_prog_data *prog_data); void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_shader *shader, - struct brw_stage_prog_data *prog_data); + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map); #ifdef __cplusplus } diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index e745bf661ee..eeb9b97f554 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -253,7 +253,8 @@ setup_vec4_uniform_value(const union gl_constant_value **params, void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_shader *shader, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { struct anv_pipeline_layout *layout = pipeline->layout; @@ -277,12 +278,6 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_foreach_block(function->impl, get_used_bindings_block, &state); } - struct anv_pipeline_bind_map map = { - .surface_count = 0, - .sampler_count = 0, - .image_count = 0, - }; - for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -290,21 +285,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, BITSET_FOREACH_SET(b, _tmp, state.set[set].used, set_layout->binding_count) { if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) - map.surface_count += set_layout->binding[b].array_size; + map->surface_count += set_layout->binding[b].array_size; if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) - map.sampler_count += set_layout->binding[b].array_size; + map->sampler_count += set_layout->binding[b].array_size; if (set_layout->binding[b].stage[shader->stage].image_index >= 0) - map.image_count += set_layout->binding[b].array_size; + map->image_count += set_layout->binding[b].array_size; } } - map.surface_to_descriptor = - malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); - map.sampler_to_descriptor = - malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); - - pipeline->bindings[shader->stage] = map; - unsigned surface = 0; unsigned sampler = 0; unsigned image = 0; @@ -320,8 +308,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { state.set[set].surface_offsets[b] = surface; for (unsigned i = 0; i < array_size; i++) { - map.surface_to_descriptor[surface + i].set = set; - map.surface_to_descriptor[surface + i].offset = set_offset + i; + map->surface_to_descriptor[surface + i].set = set; + map->surface_to_descriptor[surface + i].offset = set_offset + i; } surface += array_size; } @@ -329,8 +317,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { state.set[set].sampler_offsets[b] = sampler; for (unsigned i = 0; i < array_size; i++) { - map.sampler_to_descriptor[sampler + i].set = set; - map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + map->sampler_to_descriptor[sampler + i].set = set; + map->sampler_to_descriptor[sampler + i].offset = set_offset + i; } sampler += array_size; } @@ -351,8 +339,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } } - if (map.image_count > 0) { - assert(map.image_count <= MAX_IMAGES); + if (map->image_count > 0) { + assert(map->image_count <= MAX_IMAGES); nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -374,7 +362,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, const gl_constant_value **param = prog_data->param + (shader->num_uniforms / 4); const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map.image_count; i++) { + for (uint32_t i = 0; i < map->image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, (const union gl_constant_value *)&image_param->surface_idx, 1); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, @@ -392,7 +380,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, image_param ++; } - shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; + shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; } ralloc_free(mem_ctx); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 6a0b48b620c..f3f5ecdf660 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -193,11 +193,6 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { - free(pipeline->bindings[s].surface_to_descriptor); - free(pipeline->bindings[s].sampler_to_descriptor); - } - anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) @@ -315,7 +310,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, const char *entrypoint, gl_shader_stage stage, const VkSpecializationInfo *spec_info, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; @@ -368,7 +364,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) - anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). @@ -406,7 +402,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct brw_stage_prog_data *prog_data) + const struct brw_stage_prog_data *prog_data, + struct anv_pipeline_bind_map *map) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { @@ -424,6 +421,7 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, pipeline->total_scratch = align_u32(pipeline->total_scratch, 1024) + prog_data->total_scratch * max_threads[stage]; + pipeline->bindings[stage] = *map; } static VkResult @@ -437,6 +435,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -445,15 +444,22 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_vs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; + + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_VERTEX, spec_info, - &prog_data.base.base); + &prog_data.base.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -484,8 +490,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, - sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -501,7 +507,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -517,6 +523,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -525,15 +532,22 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_gs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; + + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, - &prog_data.base.base); + &prog_data.base.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -564,7 +578,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -572,7 +587,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -589,6 +604,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -600,17 +616,22 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (kernel == NO_KERNEL) { struct brw_wm_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; - prog_data.binding_table.render_target_start = 0; + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, - &prog_data.base); + &prog_data.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -645,7 +666,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); ralloc_free(mem_ctx); } @@ -679,7 +701,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } @@ -695,6 +717,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; const struct brw_stage_prog_data *stage_prog_data; + struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; uint32_t kernel = NO_KERNEL; unsigned char sha1[20]; @@ -703,17 +726,22 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data); + kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); } if (module->size == 0 || kernel == NO_KERNEL) { struct brw_cs_prog_data prog_data = { 0, }; + struct anv_pipeline_binding surface_to_descriptor[256]; + struct anv_pipeline_binding sampler_to_descriptor[256]; - prog_data.binding_table.work_groups_start = 0; + map = (struct anv_pipeline_bind_map) { + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, spec_info, - &prog_data.base); + &prog_data.base, &map); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -737,14 +765,16 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, kernel = anv_pipeline_cache_upload_kernel(cache, module->size > 0 ? sha1 : NULL, shader_code, code_size, - &stage_prog_data, sizeof(prog_data)); + &stage_prog_data, sizeof(prog_data), + &map); + ralloc_free(mem_ctx); } pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - stage_prog_data); + stage_prog_data, &map); return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 376cd2a7716..3d2429a4e2a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -72,6 +72,10 @@ struct cache_entry { unsigned char sha1[20]; uint32_t prog_data_size; uint32_t kernel_size; + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + char prog_data[0]; /* kernel follows prog_data at next 64 byte aligned address */ @@ -84,7 +88,11 @@ entry_size(struct cache_entry *entry) * doesn't include the alignment padding bytes. */ - return sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + const uint32_t map_size = + entry->surface_count * sizeof(struct anv_pipeline_binding) + + entry->sampler_count * sizeof(struct anv_pipeline_binding); + + return sizeof(*entry) + entry->prog_data_size + map_size; } void @@ -111,7 +119,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data) + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -126,13 +135,20 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, struct cache_entry *entry = cache->program_stream.block_pool->map + offset; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - const uint32_t preamble_size = - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - return offset + preamble_size; + if (prog_data) { + assert(map); + void *p = entry->prog_data; + *prog_data = p; + p += entry->prog_data_size; + map->surface_count = entry->surface_count; + map->sampler_count = entry->sampler_count; + map->image_count = entry->image_count; + map->surface_to_descriptor = p; + p += map->surface_count * sizeof(struct anv_pipeline_binding); + map->sampler_to_descriptor = p; + } + + return offset + align_u32(entry_size(entry), 64); } } @@ -157,7 +173,7 @@ anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, } } - cache->total_size += entry_size(entry); + cache->total_size += entry_size(entry) + entry->kernel_size; cache->kernel_count++; } @@ -214,13 +230,18 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, const struct brw_stage_prog_data **prog_data, - size_t prog_data_size) + size_t prog_data_size, + struct anv_pipeline_bind_map *map) { pthread_mutex_lock(&cache->mutex); struct cache_entry *entry; + const uint32_t map_size = + map->surface_count * sizeof(struct anv_pipeline_binding) + + map->sampler_count * sizeof(struct anv_pipeline_binding); + const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size, 64); + align_u32(sizeof(*entry) + prog_data_size + map_size, 64); const uint32_t size = preamble_size + kernel_size; @@ -230,12 +251,26 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, entry = state.map; entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, *prog_data, prog_data_size); - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + entry->surface_count = map->surface_count; + entry->sampler_count = map->sampler_count; + entry->image_count = map->image_count; entry->kernel_size = kernel_size; + void *p = entry->prog_data; + memcpy(p, *prog_data, prog_data_size); + p += prog_data_size; + + memcpy(p, map->surface_to_descriptor, + map->surface_count * sizeof(struct anv_pipeline_binding)); + map->surface_to_descriptor = p; + p += map->surface_count * sizeof(struct anv_pipeline_binding); + + memcpy(p, map->sampler_to_descriptor, + map->sampler_count * sizeof(struct anv_pipeline_binding)); + map->sampler_to_descriptor = p; + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); anv_pipeline_cache_add_entry(cache, entry, state.offset); @@ -248,6 +283,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, if (!cache->device->info.has_llc) anv_state_clflush(state); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + return state.offset + preamble_size; } @@ -282,23 +319,34 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - const void *end = data + size; - const void *p = data + header.header_size; + void *end = (void *) data + size; + void *p = (void *) data + header.header_size; while (p < end) { - /* The kernels aren't 64 byte aligned in the serialized format so - * they're always right after the prog_data. - */ - const struct cache_entry *entry = p; - const void *kernel = &entry->prog_data[entry->prog_data_size]; - - const struct brw_stage_prog_data *prog_data = - (const struct brw_stage_prog_data *) entry->prog_data; + struct cache_entry *entry = p; + + void *data = entry->prog_data; + const struct brw_stage_prog_data *prog_data = data; + data += entry->prog_data_size; + + struct anv_pipeline_binding *surface_to_descriptor = data; + data += entry->surface_count * sizeof(struct anv_pipeline_binding); + struct anv_pipeline_binding *sampler_to_descriptor = data; + data += entry->sampler_count * sizeof(struct anv_pipeline_binding); + void *kernel = data; + + struct anv_pipeline_bind_map map = { + .surface_count = entry->surface_count, + .sampler_count = entry->sampler_count, + .image_count = entry->image_count, + .surface_to_descriptor = surface_to_descriptor, + .sampler_to_descriptor = sampler_to_descriptor + }; anv_pipeline_cache_upload_kernel(cache, entry->sha1, kernel, entry->kernel_size, &prog_data, - entry->prog_data_size); + entry->prog_data_size, &map); p = kernel + entry->kernel_size; } } @@ -383,14 +431,14 @@ VkResult anv_GetPipelineCacheData( continue; entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - if (end < p + entry_size(entry)) + const uint32_t size = entry_size(entry); + if (end < p + size + entry->kernel_size) break; - memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); - p += sizeof(*entry) + entry->prog_data_size; + memcpy(p, entry, size); + p += size; - void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); + void *kernel = (void *) entry + align_u32(size, 64); memcpy(p, kernel, entry->kernel_size); p += entry->kernel_size; @@ -413,7 +461,7 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, struct cache_entry *entry = src->program_stream.block_pool->map + offset; - if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) continue; anv_pipeline_cache_add_entry(dst, entry, offset); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 70b6dd995a1..b112b457b99 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -636,18 +636,22 @@ struct anv_pipeline_cache { uint32_t * hash_table; }; +struct anv_pipeline_bind_map; + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data); + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map); uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, const struct brw_stage_prog_data **prog_data, - size_t prog_data_size); + size_t prog_data_size, + struct anv_pipeline_bind_map *map); struct anv_device { VK_LOADER_DATA _loader_data; -- cgit v1.2.3 From 30bbe28b7efc7e6b6fef78ac3233bb7485679d1e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 22:07:02 -0800 Subject: anv: Always use point size from the shader There is no API for setting the point size and the shader is always required to set it. Section 24.4: "If the value written to PointSize is less than or equal to zero, or if no value was written to PointSize, results are undefined." As such, we can just always program PointWidthSource to Vertex. This simplifies anv_pipeline a bit and avoids trouble when we enable the pipeline cache and don't have writes_point_size in the prog_data. --- src/intel/vulkan/anv_pipeline.c | 6 ------ src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index f3f5ecdf660..183589611a1 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -469,8 +469,6 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, ralloc_steal(mem_ctx, nir); prog_data.inputs_read = nir->info.inputs_read; - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, @@ -556,9 +554,6 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, nir->info.outputs_written, @@ -1122,7 +1117,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); pipeline->use_repclear = extra && extra->use_repclear; - pipeline->writes_point_size = false; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b112b457b99..8c3318816c6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1409,7 +1409,6 @@ struct anv_pipeline { bool use_repclear; - bool writes_point_size; const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5f480edf809..37e4639b287 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -65,7 +65,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, /* uint32_t AALineDistanceMode; */ /* uint32_t VertexSubPixelPrecisionSelect; */ - .UsePointWidthState = !pipeline->writes_point_size, + .UsePointWidthState = false, .PointWidth = 1.0, .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 5ce1307f090..8edc1574ac3 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -60,7 +60,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 1, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidthSource = Vertex, .PointWidth = 1.0, }; -- cgit v1.2.3 From f2b37132cb6a804b958d2e1dff17e7d77e430b96 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 5 Mar 2016 12:20:16 -0800 Subject: anv: Check if shader if present before uploading to cache Between the initial check the returns NO_KERNEL and compiling the shader, other threads may have added the shader to the cache. Before uploading the kernel, check again (under the mutex) that the compiled shader still isn't present. --- src/intel/vulkan/anv_pipeline_cache.c | 45 +++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 3d2429a4e2a..f7a1e1c679a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -116,11 +116,11 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -uint32_t -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static uint32_t +anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -152,7 +152,24 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, } } - return NO_KERNEL; + unreachable("hash table should never be full"); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data, + struct anv_pipeline_bind_map *map) +{ + uint32_t kernel; + + pthread_mutex_lock(&cache->mutex); + + kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + + pthread_mutex_unlock(&cache->mutex); + + return kernel; } static void @@ -234,6 +251,19 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, struct anv_pipeline_bind_map *map) { pthread_mutex_lock(&cache->mutex); + + /* Before uploading, check again that another thread didn't upload this + * shader while we were compiling it. + */ + if (sha1) { + uint32_t cached_kernel = + anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + if (cached_kernel != NO_KERNEL) { + pthread_mutex_unlock(&cache->mutex); + return cached_kernel; + } + } + struct cache_entry *entry; const uint32_t map_size = @@ -270,7 +300,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, map->sampler_to_descriptor = p; if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL); + assert(anv_pipeline_cache_search_unlocked(cache, sha1, + NULL, NULL) == NO_KERNEL); memcpy(entry->sha1, sha1, sizeof(entry->sha1)); anv_pipeline_cache_add_entry(cache, entry, state.offset); -- cgit v1.2.3 From 34326f46dfe1511529363b4ab46477f04d3e1574 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 15:03:23 -0800 Subject: anv: Turn pipeline cache on by default Move the environment variable check to cache creation time so we block both lookups and uploads if it's turned off. --- src/intel/vulkan/anv_pipeline_cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index f7a1e1c679a..62dbe3eda74 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -54,7 +54,8 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache, /* We don't consider allocation failure fatal, we just start with a 0-sized * cache. */ - if (cache->hash_table == NULL) + if (cache->hash_table == NULL || + !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) cache->table_size = 0; else memset(cache->hash_table, 0xff, byte_size); @@ -299,7 +300,7 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, map->sampler_count * sizeof(struct anv_pipeline_binding)); map->sampler_to_descriptor = p; - if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + if (sha1) { assert(anv_pipeline_cache_search_unlocked(cache, sha1, NULL, NULL) == NO_KERNEL); -- cgit v1.2.3 From 7b348ab8a0d38b504f659a0b4b6c8aca1a52ea6b Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sat, 5 Mar 2016 14:33:50 -0800 Subject: anv: Fix rebase error --- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 8edc1574ac3..8471fc733ba 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -333,6 +333,7 @@ genX(graphics_pipeline_create)( emit_urb_setup(pipeline); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .ClipEnable = true, .EarlyCullEnable = true, @@ -344,7 +345,7 @@ genX(graphics_pipeline_create)( REJECT_ALL : NORMAL, .NonPerspectiveBarycentricEnable = - (pipeline->wm_prog_data.barycentric_interp_modes & 0x38) != 0, + (wm_prog_data->barycentric_interp_modes & 0x38) != 0, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, @@ -354,7 +355,6 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, -- cgit v1.2.3 From 8502794c1232ea0654c879ce565fef72e3ab522d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 5 Mar 2016 14:42:16 -0800 Subject: anv/pipeline: Handle null wm_prog_data in 3DSTATE_CLIP --- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 8471fc733ba..10dd6457fbc 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -344,8 +344,8 @@ genX(graphics_pipeline_create)( pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? REJECT_ALL : NORMAL, - .NonPerspectiveBarycentricEnable = - (wm_prog_data->barycentric_interp_modes & 0x38) != 0, + .NonPerspectiveBarycentricEnable = wm_prog_data ? + (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, -- cgit v1.2.3 From 21ee5fd3263e034a54d7a37d9e5b6e6f9ef49f54 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 26 Feb 2016 11:31:04 -0800 Subject: anv: Emit null render targets v2 (Francisco Jerez): Add the state_offset to the surface state offset --- src/intel/vulkan/anv_cmd_buffer.c | 52 ++++++++++++++++++++++++++++---------- src/intel/vulkan/anv_private.h | 13 ++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 30 ++++++++++++++++++++++ 3 files changed, 82 insertions(+), 13 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5ec242fbf2a..9dca21d527a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -705,6 +705,26 @@ anv_format_for_descriptor_type(VkDescriptorType type) } } +static struct anv_state +anv_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) { + return gen75_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } else { + return gen7_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } + case 8: + return gen8_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + case 9: + return gen9_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + default: + unreachable("Invalid hardware generation"); + } +} + VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, @@ -713,27 +733,24 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_pipeline_bind_map *map; - uint32_t color_count, bias, state_offset; + uint32_t bias, state_offset; switch (stage) { case MESA_SHADER_FRAGMENT: map = &cmd_buffer->state.pipeline->bindings[stage]; bias = MAX_RTS; - color_count = subpass->color_count; break; case MESA_SHADER_COMPUTE: map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; - color_count = 0; break; default: map = &cmd_buffer->state.pipeline->bindings[stage]; bias = 0; - color_count = 0; break; } - if (color_count + map->surface_count == 0) { + if (bias + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; } @@ -746,14 +763,23 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t a = 0; a < color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); + if (stage == MESA_SHADER_FRAGMENT) { + if (subpass->color_count == 0) { + struct anv_state null_surface = + anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, + cmd_buffer->state.framebuffer); + bt_map[0] = null_surface.offset + state_offset; + } else { + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + } } if (stage == MESA_SHADER_COMPUTE && diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 8c3318816c6..7791bbc1649 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1299,6 +1299,19 @@ void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info); +struct anv_state +gen7_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen75_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen8_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); +struct anv_state +gen9_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); + void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2606a66f2a7..b969fab35bc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -590,6 +590,36 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) } } +struct anv_state +genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb) +{ + struct anv_state state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + + struct GENX(RENDER_SURFACE_STATE) null_ss = { + .SurfaceType = SURFTYPE_NULL, + .SurfaceArray = fb->layers > 0, + .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, +#if GEN_GEN >= 8 + .TileMode = YMAJOR, +#else + .TiledSurface = true, +#endif + .Width = fb->width - 1, + .Height = fb->height - 1, + .Depth = fb->layers - 1, + .RenderTargetViewExtent = fb->layers - 1, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state.map, &null_ss); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { -- cgit v1.2.3 From 23de78768b69d5600233df022431b8f26a0907fc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 6 Mar 2016 14:16:51 -0800 Subject: anv: Create fences from the batch BO pool Applications may create a *lot* of fences, perhaps as much as one per vkQueueSubmit. Really, they're supposed to use ResetFence, but it's easy enough for us to make them crazy-cheap so we might as well. --- src/intel/vulkan/anv_device.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index c68280fe8d7..8aa1e61acad 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1387,8 +1387,6 @@ VkResult anv_CreateFence( struct anv_batch batch; VkResult result; - const uint32_t fence_size = 128; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, @@ -1396,12 +1394,10 @@ VkResult anv_CreateFence( if (fence == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_init_new(&fence->bo, device, fence_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence->bo); if (result != VK_SUCCESS) goto fail; - fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); batch.next = batch.start = fence->bo.map; batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); @@ -1457,9 +1453,7 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, fence); + anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); } VkResult anv_ResetFences( -- cgit v1.2.3 From 32aa01663ff649a399480886106e203cc347c212 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Sun, 6 Mar 2016 22:06:24 -0800 Subject: anv: Quiet pTessellationState warning Some application pass a dummy for pTessellationState which results in a lot of noise. Only warn if we're actually given tessellation shadear stages. --- src/intel/vulkan/anv_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 183589611a1..86831eae30e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1113,9 +1113,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - pipeline->use_repclear = extra && extra->use_repclear; /* When we free the pipeline, we detect stages based on the NULL status @@ -1148,6 +1145,9 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); } + if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]) + anv_finishme("no tessellation support"); + if (modules[MESA_SHADER_GEOMETRY]) { anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, modules[MESA_SHADER_GEOMETRY], -- cgit v1.2.3 From 428ffc9c13c24c30c317c2e985b9097956c583b0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 14:48:35 -0800 Subject: anv/device: Actually free the CPU-side fence struct again In 23de78768, when we switched from allocating individual BOs to using the pool for fences, we accidentally deleted the free. --- src/intel/vulkan/anv_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 8aa1e61acad..816f780c6ff 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1454,6 +1454,7 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_fence, fence, _fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); + anv_free2(&device->alloc, pAllocator, fence); } VkResult anv_ResetFences( -- cgit v1.2.3 From 181b142fbd176f24a73cabf209000a9187f275e8 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Sat, 5 Mar 2016 15:17:00 -0800 Subject: anv/device: Up device limits for 3D and array texture dimensions The limit for these textures is 2048 not 1024. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 816f780c6ff..44eb0ed2d6c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -421,9 +421,9 @@ void anv_GetPhysicalDeviceProperties( VkPhysicalDeviceLimits limits = { .maxImageDimension1D = (1 << 14), .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 10), + .maxImageDimension3D = (1 << 11), .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 10), + .maxImageArrayLayers = (1 << 11), .maxTexelBufferElements = 128 * 1024 * 1024, .maxUniformBufferRange = UINT32_MAX, .maxStorageBufferRange = UINT32_MAX, -- cgit v1.2.3 From 8c2b9d152941f49d956bb2775a48158d1d10253b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 14:56:58 -0800 Subject: anv/bo_pool: Allow freeing BOs where the anv_bo is in the BO itself --- src/intel/vulkan/anv_allocator.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 3b62bda3e93..d7c09103344 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -853,11 +853,13 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) } void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) { - struct bo_pool_bo_link *link = bo->map; - link->bo = *bo; + /* Make a copy in case the anv_bo happens to be storred in the BO */ + struct anv_bo bo = *bo_in; + struct bo_pool_bo_link *link = bo.map; + link->bo = bo; - VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); anv_ptr_free_list_push(&pool->free_list, link); } -- cgit v1.2.3 From 3d4f2b0927acaac05e87ed07ae492e39b4c82ff7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 21:22:46 -0800 Subject: anv/allocator: Move the alignment assert for the pointer free list Previously we asserted every time you tried to pack a pointer and a counter together. However, this wasn't really correct. In the case where you try to grab the last element of the list, the "next elemnet" value you get may be bogus if someonoe else got there first. This was leading to assertion failures even though the allocator would safely fall through to the failure case below. --- src/intel/vulkan/anv_allocator.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index d7c09103344..385c63f9945 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -200,7 +200,6 @@ anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) #define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) #define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) #define PFL_PACK(ptr, count) ({ \ - assert(((uintptr_t)(ptr) & 0xfff) == 0); \ (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ }) @@ -230,6 +229,12 @@ anv_ptr_free_list_push(void **list, void *elem) void *old, *current; void **next_ptr = elem; + /* The pointer-based free list requires that the pointer be + * page-aligned. This is because we use the bottom 12 bits of the + * pointer to store a counter to solve the ABA concurrency problem. + */ + assert(((uintptr_t)elem & 0xfff) == 0); + old = *list; do { current = old; -- cgit v1.2.3 From f61d40adc2a09221453b7a87880e134a5424773e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 21:27:55 -0800 Subject: anv/allocator: Better casting in PFL macros We cast he constant 0xfff values to a uintptr_t before applying a bitwise negate to ensure that they are actually 64-bit when needed. Also, the count variable doesn't need to be explicitly cast, it will get upcast as needed by the "|" operation. --- src/intel/vulkan/anv_allocator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 385c63f9945..4fc83386a71 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -198,9 +198,9 @@ anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) * means that the bottom 12 bits should all be zero. */ #define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff)) #define PFL_PACK(ptr, count) ({ \ - (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ + (void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \ }) static bool -- cgit v1.2.3 From 2308891edea4d8508d3e95f29c58b4089e96b5e7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 13:45:25 -0800 Subject: anv: Store CPU-side fence information in the BO This reduces the number of allocations a bit and cuts back on memory usage. Kind-of a micro-optimization but it also makes the error handling a bit simpler so it seems like a win. --- src/intel/vulkan/anv_device.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 44eb0ed2d6c..768e2eb3be1 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1383,29 +1383,31 @@ VkResult anv_CreateFence( VkFence* pFence) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_bo fence_bo; struct anv_fence *fence; struct anv_batch batch; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (fence == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence->bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo); if (result != VK_SUCCESS) - goto fail; + return result; + + /* Fences are small. Just store the CPU data structure in the BO. */ + fence = fence_bo.map; + fence->bo = fence_bo; - batch.next = batch.start = fence->bo.map; + /* Place the batch after the CPU data but on its own cache line. */ + const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); + batch.next = batch.start = fence->bo.map + batch_offset; batch.end = fence->bo.map + fence->bo.size; anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); anv_batch_emit(&batch, GEN7_MI_NOOP); if (!device->info.has_llc) { - assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); - assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); + assert(batch.next - batch.start <= CACHELINE_SIZE); __builtin_ia32_mfence(); __builtin_ia32_clflush(fence->bo.map); } @@ -1421,8 +1423,8 @@ VkResult anv_CreateFence( fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = 0; - fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.batch_start_offset = batch.start - fence->bo.map; + fence->execbuf.batch_len = batch.next - batch.start; fence->execbuf.cliprects_ptr = 0; fence->execbuf.num_cliprects = 0; fence->execbuf.DR1 = 0; @@ -1438,11 +1440,6 @@ VkResult anv_CreateFence( *pFence = anv_fence_to_handle(fence); return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, fence); - - return result; } void anv_DestroyFence( @@ -1453,8 +1450,8 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); + assert(fence->bo.map == fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); - anv_free2(&device->alloc, pAllocator, fence); } VkResult anv_ResetFences( -- cgit v1.2.3 From 75af420cb1145f5fc34af6728047a2404b5f1add Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 18:07:48 -0800 Subject: anv/pipeline: Move binding table setup to its own helper --- src/intel/vulkan/anv_pipeline.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 86831eae30e..22af44d6020 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -366,27 +366,6 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout) anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map); - /* All binding table offsets provided by apply_pipeline_layout() are - * relative to the start of the bindint table (plus MAX_RTS for VS). - */ - unsigned bias; - switch (stage) { - case MESA_SHADER_FRAGMENT: - bias = MAX_RTS; - break; - case MESA_SHADER_COMPUTE: - bias = 1; - break; - default: - bias = 0; - break; - } - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.ssbo_start = bias; - prog_data->binding_table.image_start = bias; - /* Finish the optimization and compilation process */ if (nir->stage == MESA_SHADER_COMPUTE) brw_nir_lower_shared(nir); @@ -399,6 +378,16 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, return nir; } +static void +anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) +{ + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; +} + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, @@ -463,6 +452,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base.base, 0); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -549,6 +540,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base.base, 0); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -643,6 +636,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, } } + anv_fill_binding_table(&prog_data.base, MAX_RTS); + void *mem_ctx = ralloc_context(NULL); if (module->nir == NULL) @@ -740,6 +735,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + anv_fill_binding_table(&prog_data.base, 1); + prog_data.base.total_shared = nir->num_shared; void *mem_ctx = ralloc_context(NULL); -- cgit v1.2.3 From cce65471b8667e1752754c53361031cded5b39d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 7 Mar 2016 17:28:00 -0800 Subject: anv: Compact render targets Previously, we would always emit all of the render targets in the subpass. This commit changes it so that we compact render targets just like we do with other resources. Render targets are represented in the surface map by using a descriptor set index of UINT16_MAX. --- src/intel/vulkan/anv_cmd_buffer.c | 52 +++++++++++++++++++-------------------- src/intel/vulkan/anv_pipeline.c | 48 ++++++++++++++++++++++++++++++++---- src/intel/vulkan/anv_private.h | 11 +++++++-- src/intel/vulkan/gen8_pipeline.c | 35 +++++++++++++++++++------- 4 files changed, 104 insertions(+), 42 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 9dca21d527a..ac8bf5fc619 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -736,10 +736,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t bias, state_offset; switch (stage) { - case MESA_SHADER_FRAGMENT: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = MAX_RTS; - break; case MESA_SHADER_COMPUTE: map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; @@ -763,25 +759,6 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - if (stage == MESA_SHADER_FRAGMENT) { - if (subpass->color_count == 0) { - struct anv_state null_surface = - anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, - cmd_buffer->state.framebuffer); - bt_map[0] = null_surface.offset + state_offset; - } else { - for (uint32_t a = 0; a < subpass->color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); - } - } - } - if (stage == MESA_SHADER_COMPUTE && get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; @@ -815,14 +792,37 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t image = 0; for (uint32_t s = 0; s < map->surface_count; s++) { struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; struct anv_state surface_state; struct anv_bo *bo; uint32_t bo_offset; + if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) { + /* Color attachment binding */ + assert(stage == MESA_SHADER_FRAGMENT); + if (binding->offset < subpass->color_count) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[binding->offset]]; + + assert(iview->color_rt_surface_state.alloc_size); + surface_state = iview->color_rt_surface_state; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } else { + /* Null render target */ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + surface_state = + anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb); + } + + bt_map[bias + s] = surface_state.offset + state_offset; + continue; + } + + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + switch (desc->type) { case VK_DESCRIPTOR_TYPE_SAMPLER: /* Nothing for us to do here */ diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 22af44d6020..abe93a50af8 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -599,9 +599,6 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - if (pipeline->use_repclear) - key.nr_color_regions = 1; - if (module->size > 0) { anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); @@ -613,7 +610,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, struct anv_pipeline_binding sampler_to_descriptor[256]; map = (struct anv_pipeline_bind_map) { - .surface_to_descriptor = surface_to_descriptor, + .surface_to_descriptor = surface_to_descriptor + 8, .sampler_to_descriptor = sampler_to_descriptor }; @@ -623,6 +620,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + unsigned num_rts = 0; + struct anv_pipeline_binding rt_bindings[8]; nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; nir_foreach_variable_safe(var, &nir->outputs) { if (var->data.location < FRAG_RESULT_DATA0) @@ -630,13 +629,52 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned rt = var->data.location - FRAG_RESULT_DATA0; if (rt >= key.nr_color_regions) { + /* Out-of-bounds, throw it away */ var->data.mode = nir_var_local; exec_node_remove(&var->node); exec_list_push_tail(&impl->locals, &var->node); + continue; + } + + /* Give it a new, compacted, location */ + var->data.location = FRAG_RESULT_DATA0 + num_rts; + + unsigned array_len = + glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + assert(num_rts + array_len <= 8); + + for (unsigned i = 0; i < array_len; i++) { + rt_bindings[num_rts] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .offset = rt + i, + }; } + + num_rts += array_len; + } + + if (pipeline->use_repclear) { + assert(num_rts == 1); + key.nr_color_regions = 1; } - anv_fill_binding_table(&prog_data.base, MAX_RTS); + if (num_rts == 0) { + /* If we have no render targets, we need a null render target */ + rt_bindings[0] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .offset = UINT16_MAX, + }; + num_rts = 1; + } + + assert(num_rts <= 8); + map.surface_to_descriptor -= num_rts; + map.surface_count += num_rts; + assert(map.surface_count <= 256); + memcpy(map.surface_to_descriptor, rt_bindings, + num_rts * sizeof(*rt_bindings)); + + anv_fill_binding_table(&prog_data.base, num_rts); void *mem_ctx = ralloc_context(NULL); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7791bbc1649..f24ea20115b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -995,11 +995,16 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); +#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT16_MAX + struct anv_pipeline_binding { - /* The descriptor set this surface corresponds to */ + /* The descriptor set this surface corresponds to. The special value of + * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers + * to a color attachment and not a regular descriptor. + */ uint16_t set; - /* Offset into the descriptor set */ + /* Offset into the descriptor set or attachment list. */ uint16_t offset; }; @@ -1404,9 +1409,11 @@ struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; uint32_t image_count; + uint32_t attachment_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; + uint32_t * surface_to_attachment; }; struct anv_pipeline { diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 10dd6457fbc..71705d23200 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -114,9 +114,33 @@ emit_cb_state(struct anv_pipeline *pipeline, .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, }; + /* Default everything to disabled */ + for (uint32_t i = 0; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + struct anv_pipeline_bind_map *map = + &pipeline->bindings[MESA_SHADER_FRAGMENT]; + bool has_writeable_rt = false; - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + for (unsigned i = 0; i < map->surface_count; i++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; + + /* All color attachments are at the beginning of the binding table */ + if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) + break; + + /* We can have at most 8 attachments */ + assert(i < 8); + + if (binding->offset >= info->attachmentCount) + continue; + + const VkPipelineColorBlendAttachmentState *a = + &info->pAttachments[binding->offset]; if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || a->dstColorBlendFactor != a->dstAlphaBlendFactor || @@ -165,13 +189,6 @@ emit_cb_state(struct anv_pipeline *pipeline, } } - for (uint32_t i = info->attachmentCount; i < 8; i++) { - blend_state.Entry[i].WriteDisableAlpha = true; - blend_state.Entry[i].WriteDisableRed = true; - blend_state.Entry[i].WriteDisableGreen = true; - blend_state.Entry[i].WriteDisableBlue = true; - } - if (info->attachmentCount > 0) { struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; -- cgit v1.2.3 From dc504a51fb47d1b4a12011cb1986c3897fad007f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 10:25:00 -0800 Subject: anv/pipeline: Unconditionally emit PS_BLEND on gen8+ Special-casing the PS_BLEND packet wasn't really gaining us anything. It's defined to be more-or-less the contents of blend state entry 0 only without the indirection. We can just copy-and-paste the contents. If there are no valid color targets, then blend state 0 will be 0-initialized anyway so it's basically the same as the special case we had before. --- src/intel/vulkan/gen8_pipeline.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 71705d23200..b8b29d46b8a 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -189,24 +189,20 @@ emit_cb_state(struct anv_pipeline *pipeline, } } - if (info->attachmentCount > 0) { - struct GENX(BLEND_STATE_ENTRY) *bs = &blend_state.Entry[0]; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), - .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, - .HasWriteableRT = has_writeable_rt, - .ColorBufferBlendEnable = bs->ColorBufferBlendEnable, - .SourceAlphaBlendFactor = bs->SourceAlphaBlendFactor, - .DestinationAlphaBlendFactor = - bs->DestinationAlphaBlendFactor, - .SourceBlendFactor = bs->SourceBlendFactor, - .DestinationBlendFactor = bs->DestinationBlendFactor, - .AlphaTestEnable = false, - .IndependentAlphaBlendEnable = - blend_state.IndependentAlphaBlendEnable); - } else { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND)); - } + struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0]; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), + .AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable, + .HasWriteableRT = has_writeable_rt, + .ColorBufferBlendEnable = bs0->ColorBufferBlendEnable, + .SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor, + .DestinationAlphaBlendFactor = + bs0->DestinationAlphaBlendFactor, + .SourceBlendFactor = bs0->SourceBlendFactor, + .DestinationBlendFactor = bs0->DestinationBlendFactor, + .AlphaTestEnable = false, + .IndependentAlphaBlendEnable = + blend_state.IndependentAlphaBlendEnable); GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); if (!device->info.has_llc) -- cgit v1.2.3 From bbbdd32c192a350dd63f21cf0b01a30ee6a085ff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 11:19:02 -0800 Subject: anv/meta_clear: Use repclear again --- src/intel/vulkan/anv_meta_clear.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index bce94460844..a24e59950be 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -297,14 +297,15 @@ create_color_pipeline(struct anv_device *device, .pAttachments = blend_attachment_state }; - /* Disable repclear because we do not want the compiler to replace the - * shader. We need the shader to write to the specified color attachment, - * but the repclear shader writes to all color attachments. + /* Use the repclear shader. Since the NIR shader we are providing has + * exactly one output, that output will get compacted down to binding + * table entry 0. The hard-coded repclear shader is then exactly what + * we want regardless of what attachment we are actually clearing. */ return create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, - /*use_repclear*/ false, pipeline); + /*use_repclear*/ true, pipeline); } static void -- cgit v1.2.3 From 42b4c0fa6e0909e9622b03d56393ddec173ebe5d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 16:49:06 -0800 Subject: anv: Pull all of the genX_foo functions into anv_genX.h This way we only have to declare them each once and we get it for all gens at a single go. --- src/intel/vulkan/anv_genX.h | 61 ++++++++++++++++ src/intel/vulkan/anv_private.h | 159 +++++------------------------------------ 2 files changed, 79 insertions(+), 141 deletions(-) create mode 100644 src/intel/vulkan/anv_genX.h (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h new file mode 100644 index 00000000000..a8b96e48be5 --- /dev/null +++ b/src/intel/vulkan/anv_genX.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * Gen-specific function declarations. This header must *not* be included + * directly. Instead, it is included multiple times by gen8_private.h. + * + * In this header file, the usual genx() macro is available. + */ + +VkResult genX(init_device_state)(struct anv_device *device); + +void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state +genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, + struct anv_framebuffer *fb); + +void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); + +void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); + +void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); + +VkResult +genX(graphics_pipeline_create)(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +genX(compute_pipeline_create)(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f24ea20115b..0ef840da10e 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -691,11 +691,6 @@ struct anv_device { pthread_mutex_t mutex; }; -VkResult gen7_init_device_state(struct anv_device *device); -VkResult gen75_init_device_state(struct anv_device *device); -VkResult gen8_init_device_state(struct anv_device *device); -VkResult gen9_init_device_state(struct anv_device *device); - void anv_device_get_cache_uuid(void *uuid); @@ -1294,55 +1289,14 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info); -struct anv_state -gen7_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen75_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen8_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); -struct anv_state -gen9_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer, - struct anv_framebuffer *fb); - -void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); -void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); - struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); @@ -1538,62 +1492,6 @@ anv_graphics_pipeline_create(VkDevice device, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); -VkResult -gen7_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen75_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen7_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen75_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - struct anv_format_swizzle { unsigned r:2; unsigned g:2; @@ -1747,32 +1645,6 @@ void anv_image_view_init(struct anv_image_view *view, uint32_t offset, VkImageUsageFlags usage_mask); -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen7_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen75_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen8_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen9_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); - struct anv_buffer_view { enum isl_format format; /**< VkBufferViewCreateInfo::format */ struct anv_bo *bo; @@ -1794,19 +1666,6 @@ void anv_fill_buffer_surface_state(struct anv_device *device, uint32_t offset, uint32_t range, uint32_t stride); -void gen7_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen75_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen8_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen9_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - void anv_image_view_fill_image_param(struct anv_device *device, struct anv_image_view *view, struct brw_image_param *param); @@ -1949,6 +1808,24 @@ ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) +/* Gen-specific function declarations */ +#ifdef genX +# include "anv_genX.h" +#else +# define genX(x) gen7_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen75_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen8_##x +# include "anv_genX.h" +# undef genX +# define genX(x) gen9_##x +# include "anv_genX.h" +# undef genX +#endif + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 28cbc45b3c83d645bb2b805a0ed6008e2f9dad61 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 16:54:07 -0800 Subject: anv/cmd_buffer: Split flush_state into two functions --- src/intel/vulkan/anv_genX.h | 1 + src/intel/vulkan/gen7_cmd_buffer.c | 11 ++++++++++- src/intel/vulkan/gen8_cmd_buffer.c | 11 ++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index a8b96e48be5..f98127ba238 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -42,6 +42,7 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 8dce586eec7..d552f1b4b8a 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -462,6 +462,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) } } + cmd_buffer->state.vb_dirty &= ~vb_emit; + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and @@ -521,6 +523,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + +void +genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_RENDER_TARGETS | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | @@ -622,7 +632,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); } - cmd_buffer->state.vb_dirty &= ~vb_emit; cmd_buffer->state.dirty = 0; } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 0d27c27f5b7..f1c82235d3d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -279,6 +279,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) } } + cmd_buffer->state.vb_dirty &= ~vb_emit; + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { /* If somebody compiled a pipeline after starting a command buffer the * scratch bo may have grown since we started this cmd buffer (and @@ -324,6 +326,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + +void +genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { __emit_sf_state(cmd_buffer); @@ -452,7 +462,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ); } - cmd_buffer->state.vb_dirty &= ~vb_emit; cmd_buffer->state.dirty = 0; } -- cgit v1.2.3 From 248ab61740c4082517424f5aa94b2f4e7b210d76 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 8 Mar 2016 17:10:05 -0800 Subject: anv/cmd_buffer: Pull the core of flush_state into genX_cmd_buffer --- src/intel/vulkan/anv_genX.h | 3 + src/intel/vulkan/gen7_cmd_buffer.c | 147 +------------------------------ src/intel/vulkan/gen8_cmd_buffer.c | 135 +--------------------------- src/intel/vulkan/genX_cmd_buffer.c | 176 +++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 276 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index f98127ba238..77d387ae748 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -41,6 +41,9 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, + bool enable_slm); + void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index d552f1b4b8a..56f03268133 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -32,44 +32,6 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage]); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - } - } - - cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; - - return flushed; -} - #if GEN_GEN == 7 && !GEN_IS_HASWELL void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, @@ -344,8 +306,8 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN7_L3CNTLREG2 0xb020 #define GEN7_L3CNTLREG3 0xb024 -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +void +genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) { /* References for GL state: * @@ -401,7 +363,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); bool needs_slm = cs_prog_data->base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); + genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), @@ -423,109 +385,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, - .InstanceDataStepRate = 1 - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: - * - * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to - * the next 3DPRIMITIVE command after programming the - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" - * - * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of - * pipeline setup, we need to dirty push constants. - */ - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; - } - - if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || - cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, - * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, - * 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one - * PIPE_CONTROL needs to be sent before any combination of VS - * associated 3DSTATE." - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &cmd_buffer->device->workaround_bo, 0 }); - } - - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) { - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - } - - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - - /* We use the gen8 state here because it only contains the additional - * min/max fields and, since they occur at the end of the packet and - * don't change the stride, they work on gen7 too. - */ - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index f1c82235d3d..4a926255a5d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -32,46 +32,6 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage]); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - } - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - #if GEN_GEN == 8 static void emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, @@ -158,8 +118,8 @@ emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) #define GEN8_L3CNTLREG 0x7034 -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +void +genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) { /* References for GL state: * @@ -240,95 +200,6 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) __emit_genx_sf_state(cmd_buffer); } -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - config_l3(cmd_buffer, false); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: - * - * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to - * the next 3DPRIMITIVE command after programming the - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" - * - * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of - * pipeline setup, we need to dirty push constants. - */ - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; - } - - /* We emit the binding tables and sampler tables first, then emit push - * constants and then finally emit binding table and sampler table - * pointers. It has to happen in this order, since emitting the binding - * tables may change the push constants (in case of storage images). After - * emitting push constants, on SKL+ we have to emit the corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. - */ - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.push_constants_dirty) - dirty |= cmd_buffer_flush_push_constants(cmd_buffer); - - if (dirty) - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -573,7 +444,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); bool needs_slm = cs_prog_data->base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); + genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { #if GEN_GEN < 10 diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b969fab35bc..c3d2043dcdf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -269,6 +269,182 @@ void genX(CmdPipelineBarrier)( } } +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { +#if GEN_GEN >= 9 + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), +#else + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), +#endif + }); + } + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + + return flushed; +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + genX(cmd_buffer_config_l3)(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + +#if GEN_GEN >= 8 + .MemoryObjectControlState = GENX(MOCS), +#else + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .InstanceDataStepRate = 1, + .VertexBufferMemoryObjectControlState = GENX(MOCS), +#endif + + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + +#if GEN_GEN >= 8 + .BufferSize = buffer->size - offset +#else + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, +#endif + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; + } + +#if GEN_GEN <= 7 + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } +#endif + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) { +#if GEN_GEN >= 9 + /* On Sky Lake and later, the binding table pointers commands are + * what actually flush the changes to push constant state so we need + * to dirty them so they get re-emitted below. + */ + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); +#else + cmd_buffer_flush_push_constants(cmd_buffer); +#endif + } + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); +} + static void emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *bo, uint32_t offset) -- cgit v1.2.3 From 7ebbc3946ae9cffb3c3db522dcbe2f1041633164 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 4 Mar 2016 11:43:19 -0800 Subject: anv/meta: Minimize height of images used for copies In addition to demystifying the value being added to the height, this future-proofs the code for new tiling modes and keeps the image height as small as possible. v2: Actually use the smallest height possible. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b8a42f99eec..ecd4d2d3536 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -450,8 +450,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .format = 0, /* TEMPLATE */ .extent = { .width = 0, /* TEMPLATE */ - /* Pad to highest tile height to compensate for a vertical intratile offset */ - .height = MIN(rects[r].height + 64, 1 << 14), + .height = 0, /* TEMPLATE */ .depth = 1, }, .mipLevels = 1, @@ -465,11 +464,19 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .isl_tiling_flags = 0, /* TEMPLATE */ }; + /* The image height is the rect height + src/dst y-offset from the + * tile-aligned base address. + */ + struct isl_tile_info tile_info; + anv_image_info.isl_tiling_flags = 1 << src->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; image_info.format = src_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); + image_info.extent.height = rects[r].height + + rects[r].src_y % tile_info.height; image_info.extent.width = src->pitch / src->bs; VkImage src_image; anv_image_create(vk_device, &anv_image_info, @@ -480,6 +487,9 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; image_info.format = dst_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); + image_info.extent.height = rects[r].height + + rects[r].dst_y % tile_info.height; image_info.extent.width = dst->pitch / dst->bs; VkImage dst_image; anv_image_create(vk_device, &anv_image_info, -- cgit v1.2.3 From ddbc6458464b86fa3f4f87f0f2db2f117fa04cdc Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 14:18:27 -0800 Subject: anv/meta: Store src and dst usage flags in a variable Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index ecd4d2d3536..82b79b88f2c 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -440,6 +440,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); VkFormat src_format = vk_format_for_size(src->bs); VkFormat dst_format = vk_format_for_size(dst->bs); + VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { @@ -472,7 +474,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_image_info.isl_tiling_flags = 1 << src->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.usage = src_usage; image_info.format = src_format, isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); image_info.extent.height = rects[r].height + @@ -485,7 +487,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_image_info.isl_tiling_flags = 1 << dst->tiling; image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.usage = dst_usage; image_info.format = dst_format, isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); image_info.extent.height = rects[r].height + @@ -533,7 +535,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view src_iview; anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_SAMPLED_BIT); + &iview_info, cmd_buffer, img_o, src_usage); iview_info.image = dst_image; iview_info.format = dst_format; @@ -548,7 +550,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, (uint32_t*)&dst_offset_el.y); struct anv_image_view dst_iview; anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ meta_emit_blit(cmd_buffer, -- cgit v1.2.3 From f39168392243d6dacefbc8708b764c5978ff24df Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 22:38:05 -0800 Subject: anv/meta: Make meta_emit_blit() public This can be reverted if the only other consumer, anv_meta_blit2d(), uses a different method. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta.h | 11 +++++++++++ src/intel/vulkan/anv_meta_blit.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 587c044fa5f..fb562dbd564 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -105,6 +105,17 @@ void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); +void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter); #ifdef __cplusplus } #endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 82b79b88f2c..57833bf66ac 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -160,7 +160,7 @@ meta_region_extent_el(const VkFormat format, }; } -static void +void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, -- cgit v1.2.3 From 627728cce55b8b67bb30bdd206affb6f0885315b Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 7 Mar 2016 15:15:33 -0800 Subject: anv/meta: Split anv_meta_blit.c into three files The new organization is as follows: * anv_meta_blit.c: Blit and state setup/teardown commands * anv_meta_copy.c: Copy and update commands * anv_meta_blit2d.c: 2D Blitter API commands Also, change the formatting to contain most lines within 80 columns. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/Makefile.am | 2 + src/intel/vulkan/anv_meta_blit.c | 612 +------------------------------------ src/intel/vulkan/anv_meta_blit2d.c | 213 +++++++++++++ src/intel/vulkan/anv_meta_copy.c | 441 ++++++++++++++++++++++++++ 4 files changed, 662 insertions(+), 606 deletions(-) create mode 100644 src/intel/vulkan/anv_meta_blit2d.c create mode 100644 src/intel/vulkan/anv_meta_copy.c (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 272db40d10b..f20cd41fbba 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -83,7 +83,9 @@ VULKAN_SOURCES = \ anv_intel.c \ anv_meta.c \ anv_meta_blit.c \ + anv_meta_blit2d.c \ anv_meta_clear.c \ + anv_meta_copy.c \ anv_meta_resolve.c \ anv_nir_apply_dynamic_offsets.c \ anv_nir_apply_pipeline_layout.c \ diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 57833bf66ac..7bddc6b2d42 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,47 +119,6 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void -anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) -{ - meta_prepare_blit(cmd_buffer, save); -} - - -/* Returns the user-provided VkBufferImageCopy::imageOffset in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkOffset3D -meta_region_offset_el(const struct anv_image * image, - const struct VkOffset3D * offset) -{ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - return (VkOffset3D) { - .x = offset->x / isl_layout->bw, - .y = offset->y / isl_layout->bh, - .z = offset->z / isl_layout->bd, - }; -} - -/* Returns the user-provided VkBufferImageCopy::imageExtent in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkExtent3D -meta_region_extent_el(const VkFormat format, - const struct VkExtent3D * extent) -{ - const struct isl_format_layout * isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { - .width = DIV_ROUND_UP(extent->width , isl_layout->bw), - .height = DIV_ROUND_UP(extent->height, isl_layout->bh), - .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; -} - void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, @@ -194,8 +153,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.x + src_extent.width) + / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) + / (float)src_iview->extent.height, (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -207,7 +168,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, .tex_coord = { (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.y + src_extent.height) / + (float)src_iview->extent.height, (float)src_offset.z / (float)src_iview->extent.depth, }, }; @@ -380,444 +342,6 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, anv_meta_restore(saved_state, cmd_buffer); } -void -anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) -{ - meta_finish_blit(cmd_buffer, save); -} - -static VkFormat -vk_format_for_size(int bs) -{ - /* The choice of UNORM and UINT formats is very intentional here. Most of - * the time, we want to use a UINT format to avoid any rounding error in - * the blit. For stencil blits, R8_UINT is required by the hardware. - * (It's the only format allowed in conjunction with W-tiling.) Also we - * intentionally use the 4-channel formats whenever we can. This is so - * that, when we do a RGB <-> RGBX copy, the two formats will line up even - * though one of them is 3/4 the size of the other. The choice of UNORM - * vs. UINT is also very intentional because Haswell doesn't handle 8 or - * 16-bit RGB UINT formats at all so we have to use UNORM there. - * Fortunately, the only time we should ever use two different formats in - * the table below is for RGB -> RGBA blits and so we will never have any - * UNORM/UINT mismatch. - */ - switch (bs) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UNORM; - case 4: return VK_FORMAT_R8G8B8A8_UNORM; - case 6: return VK_FORMAT_R16G16B16_UNORM; - case 8: return VK_FORMAT_R16G16B16A16_UNORM; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format block size"); - } -} - -static struct anv_meta_blit2d_surf -blit_surf_for_image(const struct anv_image* image, - const struct isl_surf *img_isl_surf) -{ - return (struct anv_meta_blit2d_surf) { - .bo = image->bo, - .tiling = img_isl_surf->tiling, - .base_offset = image->offset, - .bs = isl_format_get_layout(img_isl_surf->format)->bs, - .pitch = isl_surf_get_row_pitch(img_isl_surf), - }; -} - -void -anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_blit2d_surf *src, - struct anv_meta_blit2d_surf *dst, - unsigned num_rects, - struct anv_meta_blit2d_rect *rects) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat src_format = vk_format_for_size(src->bs); - VkFormat dst_format = vk_format_for_size(dst->bs); - VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; - VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - for (unsigned r = 0; r < num_rects; ++r) { - - /* Create VkImages */ - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = 0, /* TEMPLATE */ - .extent = { - .width = 0, /* TEMPLATE */ - .height = 0, /* TEMPLATE */ - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = 0, /* TEMPLATE */ - .usage = 0, /* TEMPLATE */ - }; - struct anv_image_create_info anv_image_info = { - .vk_info = &image_info, - .isl_tiling_flags = 0, /* TEMPLATE */ - }; - - /* The image height is the rect height + src/dst y-offset from the - * tile-aligned base address. - */ - struct isl_tile_info tile_info; - - anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = src_usage; - image_info.format = src_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, &tile_info); - image_info.extent.height = rects[r].height + - rects[r].src_y % tile_info.height; - image_info.extent.width = src->pitch / src->bs; - VkImage src_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &src_image); - - anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == ISL_TILING_LINEAR_BIT ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = dst_usage; - image_info.format = dst_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, &tile_info); - image_info.extent.height = rects[r].height + - rects[r].dst_y % tile_info.height; - image_info.extent.width = dst->pitch / dst->bs; - VkImage dst_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &dst_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src->bo; - anv_image_from_handle(src_image)->offset = src->base_offset; - anv_image_from_handle(dst_image)->bo = dst->bo; - anv_image_from_handle(dst_image)->offset = dst->base_offset; - - /* Create VkImageViews */ - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = 0, /* TEMPLATE */ - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }; - uint32_t img_o = 0; - - iview_info.image = src_image; - iview_info.format = src_format; - VkOffset3D src_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(src_image)-> - color_surface.isl, - rects[r].src_x, - rects[r].src_y, - &img_o, - (uint32_t*)&src_offset_el.x, - (uint32_t*)&src_offset_el.y); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, src_usage); - - iview_info.image = dst_image; - iview_info.format = dst_format; - VkOffset3D dst_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(dst_image)-> - color_surface.isl, - rects[r].dst_x, - rects[r].dst_y, - &img_o, - (uint32_t*)&dst_offset_el.x, - (uint32_t*)&dst_offset_el.y); - struct anv_image_view dst_iview; - anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, dst_usage); - - /* Perform blit */ - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - src_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - anv_image_from_handle(dst_image), - &dst_iview, - dst_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, int bs) -{ - struct anv_meta_blit2d_surf b_src = { - .bo = src, - .tiling = ISL_TILING_LINEAR, - .base_offset = src_offset, - .bs = bs, - .pitch = width * bs, - }; - struct anv_meta_blit2d_surf b_dst = { - .bo = dest, - .tiling = ISL_TILING_LINEAR, - .base_offset = dest_offset, - .bs = bs, - .pitch = width * bs, - }; - struct anv_meta_blit2d_rect rect = { - .width = width, - .height = height, - }; - anv_meta_blit2d(cmd_buffer, - &b_src, - &b_dst, - 1, - &rect); -} - -void anv_CmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_meta_saved_state saved_state; - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(src_offset % bs == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(dest_offset % bs == 0); - - fs = ffs(pRegions[r].size) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(pRegions[r].size % bs == 0); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, bs); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, bs); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / bs, 1, bs); - } - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - /* We can't quite grab a full block because the state stream needs a - * little data at the top to build its linked list. - */ - const uint32_t max_update_size = - cmd_buffer->device->dynamic_state_block_pool.block_size - 64; - - assert(max_update_size < (1 << 14) * 4); - - while (dataSize) { - const uint32_t copy_size = MIN2(dataSize, max_update_size); - - struct anv_state tmp_data = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); - - memcpy(tmp_data.map, pData, copy_size); - - int bs; - if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - bs = 16; - } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - bs = 8; - } else { - assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - bs = 4; - } - - do_buffer_copy(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, bs); - - dataSize -= copy_size; - dstOffset += copy_size; - pData = (void *)pData + copy_size; - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdCopyImage can be used to copy image data between multisample - * images, but both images must have the same number of samples. - */ - assert(src_image->samples == dest_image->samples); - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - - /* Create blit surfaces */ - struct isl_surf *src_isl_surf = - &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; - struct isl_surf *dst_isl_surf = - &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; - struct anv_meta_blit2d_surf b_src = blit_surf_for_image(src_image, src_isl_surf); - struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); - - /* Start creating blit rect */ - const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); - const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); - const VkExtent3D img_extent_el = meta_region_extent_el(src_image->vk_format, - &pRegions[r].extent); - struct anv_meta_blit2d_rect rect = { - .width = img_extent_el.width, - .height = img_extent_el.height, - }; - - /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].extent.depth; - unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - - /* Finish creating blit rect */ - isl_surf_get_image_offset_el(dst_isl_surf, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, - &rect.dst_x, - &rect.dst_y); - isl_surf_get_image_offset_el(src_isl_surf, - pRegions[r].srcSubresource.mipLevel, - pRegions[r].srcSubresource.baseArrayLayer + slice_array, - pRegions[r].srcOffset.z + slice_3d, - &rect.src_x, - &rect.src_y); - rect.dst_x += dst_offset_el.x; - rect.dst_y += dst_offset_el.y; - rect.src_x += src_offset_el.x; - rect.src_y += src_offset_el.y; - - /* Perform Blit */ - anv_meta_blit2d(cmd_buffer, - &b_src, - &b_dst, - 1, - &rect); - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - } - - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - void anv_CmdBlitImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -925,130 +449,6 @@ void anv_CmdBlitImage( meta_finish_blit(cmd_buffer, &saved_state); } -static void -meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, - struct anv_buffer* buffer, - struct anv_image* image, - uint32_t regionCount, - const VkBufferImageCopy* pRegions, - bool forward) -{ - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(image->samples == 1); - - anv_meta_begin_blit2d(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - - /* Start creating blit rect */ - const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); - const VkExtent3D bufferExtent = { - .width = pRegions[r].bufferRowLength, - .height = pRegions[r].bufferImageHeight, - }; - const VkExtent3D buf_extent_el = meta_region_extent_el(image->vk_format, &bufferExtent); - const VkExtent3D img_extent_el = meta_region_extent_el(image->vk_format, - &pRegions[r].imageExtent); - struct anv_meta_blit2d_rect rect = { - .width = MAX2(buf_extent_el.width, img_extent_el.width), - .height = MAX2(buf_extent_el.height, img_extent_el.height), - }; - - /* Create blit surfaces */ - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - const struct isl_surf *img_isl_surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - struct anv_meta_blit2d_surf img_bsurf = blit_surf_for_image(image, img_isl_surf); - struct anv_meta_blit2d_surf buf_bsurf = { - .bo = buffer->bo, - .tiling = ISL_TILING_LINEAR, - .base_offset = buffer->offset + pRegions[r].bufferOffset, - .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, - .pitch = rect.width * buf_bsurf.bs, - }; - - /* Set direction-dependent variables */ - struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; - struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; - uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; - uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; - - /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - - /* Finish creating blit rect */ - isl_surf_get_image_offset_el(img_isl_surf, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - x_offset, - y_offset); - *x_offset += img_offset_el.x; - *y_offset += img_offset_el.y; - - /* Perform Blit */ - anv_meta_blit2d(cmd_buffer, - src_bsurf, - dst_bsurf, - 1, - &rect); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; - - if (image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - } - anv_meta_end_blit2d(cmd_buffer, &saved_state); -} - -void anv_CmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - - meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, - regionCount, pRegions, true); -} - -void anv_CmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); - - meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, - regionCount, pRegions, false); -} - void anv_device_finish_meta_blit_state(struct anv_device *device) { diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c new file mode 100644 index 00000000000..b165abd9b6c --- /dev/null +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -0,0 +1,213 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +static VkFormat +vk_format_for_size(int bs) +{ + /* The choice of UNORM and UINT formats is very intentional here. Most of + * the time, we want to use a UINT format to avoid any rounding error in + * the blit. For stencil blits, R8_UINT is required by the hardware. + * (It's the only format allowed in conjunction with W-tiling.) Also we + * intentionally use the 4-channel formats whenever we can. This is so + * that, when we do a RGB <-> RGBX copy, the two formats will line up even + * though one of them is 3/4 the size of the other. The choice of UNORM + * vs. UINT is also very intentional because Haswell doesn't handle 8 or + * 16-bit RGB UINT formats at all so we have to use UNORM there. + * Fortunately, the only time we should ever use two different formats in + * the table below is for RGB -> RGBA blits and so we will never have any + * UNORM/UINT mismatch. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UNORM; + case 4: return VK_FORMAT_R8G8B8A8_UNORM; + case 6: return VK_FORMAT_R16G16B16_UNORM; + case 8: return VK_FORMAT_R16G16B16A16_UNORM; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_restore(save, cmd_buffer); +} + +void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_save(save, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + VkFormat src_format = vk_format_for_size(src->bs); + VkFormat dst_format = vk_format_for_size(dst->bs); + VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; + VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + for (unsigned r = 0; r < num_rects; ++r) { + + /* Create VkImages */ + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = 0, /* TEMPLATE */ + .extent = { + .width = 0, /* TEMPLATE */ + .height = 0, /* TEMPLATE */ + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = 0, /* TEMPLATE */ + .usage = 0, /* TEMPLATE */ + }; + struct anv_image_create_info anv_image_info = { + .vk_info = &image_info, + .isl_tiling_flags = 0, /* TEMPLATE */ + }; + + /* The image height is the rect height + src/dst y-offset from the + * tile-aligned base address. + */ + struct isl_tile_info tile_info; + + anv_image_info.isl_tiling_flags = 1 << src->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == + ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = src_usage; + image_info.format = src_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, + &tile_info); + image_info.extent.height = rects[r].height + + rects[r].src_y % tile_info.height; + image_info.extent.width = src->pitch / src->bs; + VkImage src_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &src_image); + + anv_image_info.isl_tiling_flags = 1 << dst->tiling; + image_info.tiling = anv_image_info.isl_tiling_flags == + ISL_TILING_LINEAR_BIT ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + image_info.usage = dst_usage; + image_info.format = dst_format, + isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, + &tile_info); + image_info.extent.height = rects[r].height + + rects[r].dst_y % tile_info.height; + image_info.extent.width = dst->pitch / dst->bs; + VkImage dst_image; + anv_image_create(vk_device, &anv_image_info, + &cmd_buffer->pool->alloc, &dst_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src->bo; + anv_image_from_handle(src_image)->offset = src->base_offset; + anv_image_from_handle(dst_image)->bo = dst->bo; + anv_image_from_handle(dst_image)->offset = dst->base_offset; + + /* Create VkImageViews */ + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = 0, /* TEMPLATE */ + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = 0, /* TEMPLATE */ + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }; + uint32_t img_o = 0; + + iview_info.image = src_image; + iview_info.format = src_format; + VkOffset3D src_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(src_image)-> + color_surface.isl, + rects[r].src_x, + rects[r].src_y, + &img_o, + (uint32_t*)&src_offset_el.x, + (uint32_t*)&src_offset_el.y); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, src_usage); + + iview_info.image = dst_image; + iview_info.format = dst_format; + VkOffset3D dst_offset_el = {0}; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(dst_image)-> + color_surface.isl, + rects[r].dst_x, + rects[r].dst_y, + &img_o, + (uint32_t*)&dst_offset_el.x, + (uint32_t*)&dst_offset_el.y); + struct anv_image_view dst_iview; + anv_image_view_init(&dst_iview, cmd_buffer->device, + &iview_info, cmd_buffer, img_o, dst_usage); + + /* Perform blit */ + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + src_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + anv_image_from_handle(dst_image), + &dst_iview, + dst_offset_el, + (VkExtent3D){rects[r].width, rects[r].height, 1}, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + } +} + diff --git a/src/intel/vulkan/anv_meta_copy.c b/src/intel/vulkan/anv_meta_copy.c new file mode 100644 index 00000000000..1a2bfd6cf01 --- /dev/null +++ b/src/intel/vulkan/anv_meta_copy.c @@ -0,0 +1,441 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D *extent) +{ + const struct isl_format_layout *isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image *image, + const struct VkOffset3D *offset) +{ + const struct isl_format_layout *isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +static struct anv_meta_blit2d_surf +blit_surf_for_image(const struct anv_image* image, + const struct isl_surf *img_isl_surf) +{ + return (struct anv_meta_blit2d_surf) { + .bo = image->bo, + .tiling = img_isl_surf->tiling, + .base_offset = image->offset, + .bs = isl_format_get_layout(img_isl_surf->format)->bs, + .pitch = isl_surf_get_row_pitch(img_isl_surf), + }; +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, int bs) +{ + struct anv_meta_blit2d_surf b_src = { + .bo = src, + .tiling = ISL_TILING_LINEAR, + .base_offset = src_offset, + .bs = bs, + .pitch = width * bs, + }; + struct anv_meta_blit2d_surf b_dst = { + .bo = dest, + .tiling = ISL_TILING_LINEAR, + .base_offset = dest_offset, + .bs = bs, + .pitch = width * bs, + }; + struct anv_meta_blit2d_rect rect = { + .width = width, + .height = height, + }; + anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect); +} + +static void +meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_buffer* buffer, + struct anv_image* image, + uint32_t regionCount, + const VkBufferImageCopy* pRegions, + bool forward) +{ + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(image->samples == 1); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + + /* Start creating blit rect */ + const VkOffset3D img_offset_el = + meta_region_offset_el(image, &pRegions[r].imageOffset); + const VkExtent3D bufferExtent = { + .width = pRegions[r].bufferRowLength, + .height = pRegions[r].bufferImageHeight, + }; + const VkExtent3D buf_extent_el = + meta_region_extent_el(image->vk_format, &bufferExtent); + const VkExtent3D img_extent_el = + meta_region_extent_el(image->vk_format, &pRegions[r].imageExtent); + struct anv_meta_blit2d_rect rect = { + .width = MAX2(buf_extent_el.width, img_extent_el.width), + .height = MAX2(buf_extent_el.height, img_extent_el.height), + }; + + /* Create blit surfaces */ + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + const struct isl_surf *img_isl_surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + struct anv_meta_blit2d_surf img_bsurf = + blit_surf_for_image(image, img_isl_surf); + struct anv_meta_blit2d_surf buf_bsurf = { + .bo = buffer->bo, + .tiling = ISL_TILING_LINEAR, + .base_offset = buffer->offset + pRegions[r].bufferOffset, + .bs = forward ? image->format->isl_layout->bs : img_bsurf.bs, + .pitch = rect.width * buf_bsurf.bs, + }; + + /* Set direction-dependent variables */ + struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf; + struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf; + uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x; + uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(img_isl_surf, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + + slice_array, + pRegions[r].imageOffset.z + slice_3d, + x_offset, + y_offset); + *x_offset += img_offset_el.x; + *y_offset += img_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, src_bsurf, dst_bsurf, 1, &rect); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs; + + if (image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + + meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, + regionCount, pRegions, true); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer); + + meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, + regionCount, pRegions, false); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + /* Create blit surfaces */ + struct isl_surf *src_isl_surf = + &anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl; + struct isl_surf *dst_isl_surf = + &anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl; + struct anv_meta_blit2d_surf b_src = + blit_surf_for_image(src_image, src_isl_surf); + struct anv_meta_blit2d_surf b_dst = + blit_surf_for_image(dest_image, dst_isl_surf); + + /* Start creating blit rect */ + const VkOffset3D dst_offset_el = + meta_region_offset_el(dest_image, &pRegions[r].dstOffset); + const VkOffset3D src_offset_el = + meta_region_offset_el(src_image, &pRegions[r].srcOffset); + const VkExtent3D img_extent_el = + meta_region_extent_el(src_image->vk_format, &pRegions[r].extent); + struct anv_meta_blit2d_rect rect = { + .width = img_extent_el.width, + .height = img_extent_el.height, + }; + + /* Loop through each 3D or array slice */ + unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + + /* Finish creating blit rect */ + isl_surf_get_image_offset_el(dst_isl_surf, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer + + slice_array, + pRegions[r].dstOffset.z + slice_3d, + &rect.dst_x, + &rect.dst_y); + isl_surf_get_image_offset_el(src_isl_surf, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer + + slice_array, + pRegions[r].srcOffset.z + slice_3d, + &rect.src_x, + &rect.src_y); + rect.dst_x += dst_offset_el.x; + rect.dst_y += dst_offset_el.y; + rect.src_x += src_offset_el.x; + rect.src_y += src_offset_el.y; + + /* Perform Blit */ + anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect); + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, bs); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, bs); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, bs); + } + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + anv_meta_begin_blit2d(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, bs); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } + + anv_meta_end_blit2d(cmd_buffer, &saved_state); +} -- cgit v1.2.3 From 514c0557178b0325c59a28d68b0f250f0eeaddf5 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 12:45:55 -0800 Subject: anv/meta: Prefix anv_ to meta_emit_blit() Follow the convention for non-static functions. Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta.h | 2 +- src/intel/vulkan/anv_meta_blit.c | 4 ++-- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index fb562dbd564..e2e00433c49 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -106,7 +106,7 @@ anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 7bddc6b2d42..2c3c91733a7 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -120,7 +120,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, } void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, @@ -438,7 +438,7 @@ void anv_CmdBlitImage( }, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - meta_emit_blit(cmd_buffer, + anv_meta_emit_blit(cmd_buffer, src_image, &src_iview, pRegions[r].srcOffsets[0], src_extent, dest_image, &dest_iview, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index b165abd9b6c..d49b4708f8e 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -195,7 +195,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ - meta_emit_blit(cmd_buffer, + anv_meta_emit_blit(cmd_buffer, anv_image_from_handle(src_image), &src_iview, src_offset_el, -- cgit v1.2.3 From 7fbbad01706f08645e832e6dd2f5eeaf1e3c6894 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 09:37:43 -0800 Subject: anv/blit2d: Use the tiling enum for simplicity Signed-off-by: Nanley Chery Reviewed-by: Anuj Phogat --- src/intel/vulkan/anv_meta_blit2d.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index d49b4708f8e..6f0734254bf 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -110,8 +110,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct isl_tile_info tile_info; anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == - ISL_TILING_LINEAR_BIT ? + image_info.tiling = src->tiling == ISL_TILING_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = src_usage; image_info.format = src_format, @@ -125,8 +124,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc, &src_image); anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = anv_image_info.isl_tiling_flags == - ISL_TILING_LINEAR_BIT ? + image_info.tiling = dst->tiling == ISL_TILING_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; image_info.usage = dst_usage; image_info.format = dst_format, -- cgit v1.2.3 From 132f079a8cbaeab442a7ea8b0f02b9f07dfdd310 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 11:24:46 -0800 Subject: anv/gem: Use C99-style struct initializers for DRM structs This is more consistent with the way the rest of the driver works and ensures that all structs we pass into the kernel are zero'd out except for the fields we actually want to fill. We were previously doing then when building with valgrind to keep valgrind from complaining. However, we need to start doing this unconditionally as recent kernels have been getting touchier about this. In particular, as of kernel commit b31e51360e88 from Chris Wilson, context creation and destroy fail if the padding bits are not set to 0. --- src/intel/vulkan/anv_gem.c | 175 ++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 99 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c index 0a7be353327..a886f7c2cb0 100644 --- a/src/intel/vulkan/anv_gem.c +++ b/src/intel/vulkan/anv_gem.c @@ -32,8 +32,6 @@ #include "anv_private.h" -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) - static int anv_ioctl(int fd, unsigned long request, void *arg) { @@ -54,13 +52,11 @@ anv_ioctl(int fd, unsigned long request, void *arg) uint32_t anv_gem_create(struct anv_device *device, size_t size) { - struct drm_i915_gem_create gem_create; - int ret; - - VG_CLEAR(gem_create); - gem_create.size = size; + struct drm_i915_gem_create gem_create = { + .size = size, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); if (ret != 0) { /* FIXME: What do we do if this fails? */ return 0; @@ -72,10 +68,10 @@ anv_gem_create(struct anv_device *device, size_t size) void anv_gem_close(struct anv_device *device, uint32_t gem_handle) { - struct drm_gem_close close; + struct drm_gem_close close = { + .handle = gem_handle, + }; - VG_CLEAR(close); - close.handle = gem_handle; anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); } @@ -86,17 +82,14 @@ void* anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags) { - struct drm_i915_gem_mmap gem_mmap; - int ret; - - gem_mmap.handle = gem_handle; - VG_CLEAR(gem_mmap.pad); - gem_mmap.offset = offset; - gem_mmap.size = size; - VG_CLEAR(gem_mmap.addr_ptr); - gem_mmap.flags = flags; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + struct drm_i915_gem_mmap gem_mmap = { + .handle = gem_handle, + .offset = offset, + .size = size, + .flags = flags, + }; + + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); if (ret != 0) { /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ return NULL; @@ -119,15 +112,13 @@ anv_gem_munmap(void *p, uint64_t size) uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size) { - struct drm_i915_gem_userptr userptr; - int ret; - - VG_CLEAR(userptr); - userptr.user_ptr = (__u64)((unsigned long) mem); - userptr.user_size = size; - userptr.flags = 0; + struct drm_i915_gem_userptr userptr = { + .user_ptr = (__u64)((unsigned long) mem), + .user_size = size, + .flags = 0, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); if (ret == -1) return 0; @@ -138,11 +129,10 @@ int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching) { - struct drm_i915_gem_caching gem_caching; - - VG_CLEAR(gem_caching); - gem_caching.handle = gem_handle; - gem_caching.caching = caching; + struct drm_i915_gem_caching gem_caching = { + .handle = gem_handle, + .caching = caching, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); } @@ -151,12 +141,11 @@ int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, uint32_t read_domains, uint32_t write_domain) { - struct drm_i915_gem_set_domain gem_set_domain; - - VG_CLEAR(gem_set_domain); - gem_set_domain.handle = gem_handle; - gem_set_domain.read_domains = read_domains; - gem_set_domain.write_domain = write_domain; + struct drm_i915_gem_set_domain gem_set_domain = { + .handle = gem_handle, + .read_domains = read_domains, + .write_domain = write_domain, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); } @@ -167,15 +156,13 @@ anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) { - struct drm_i915_gem_wait wait; - int ret; + struct drm_i915_gem_wait wait = { + .bo_handle = gem_handle, + .timeout_ns = *timeout_ns, + .flags = 0, + }; - VG_CLEAR(wait); - wait.bo_handle = gem_handle; - wait.timeout_ns = *timeout_ns; - wait.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); *timeout_ns = wait.timeout_ns; return ret; @@ -192,18 +179,17 @@ int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling) { - struct drm_i915_gem_set_tiling set_tiling; int ret; /* set_tiling overwrites the input on the error path, so we have to open * code anv_ioctl. */ - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = stride; + struct drm_i915_gem_set_tiling set_tiling = { + .handle = gem_handle, + .tiling_mode = tiling, + .stride = stride, + }; ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); @@ -214,13 +200,14 @@ anv_gem_set_tiling(struct anv_device *device, int anv_gem_get_param(int fd, uint32_t param) { - drm_i915_getparam_t gp; - int ret, tmp; + int tmp; + + drm_i915_getparam_t gp = { + .param = param, + .value = &tmp, + }; - VG_CLEAR(gp); - gp.param = param; - gp.value = &tmp; - ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + int ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); if (ret == 0) return tmp; @@ -233,9 +220,9 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) struct drm_gem_close close; int ret; - struct drm_i915_gem_create gem_create; - VG_CLEAR(gem_create); - gem_create.size = 4096; + struct drm_i915_gem_create gem_create = { + .size = 4096, + }; if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { assert(!"Failed to create GEM BO"); @@ -247,12 +234,12 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) /* set_tiling overwrites the input on the error path, so we have to open * code anv_ioctl. */ - struct drm_i915_gem_set_tiling set_tiling; do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_create.handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + struct drm_i915_gem_set_tiling set_tiling = { + .handle = gem_create.handle, + .tiling_mode = tiling, + .stride = tiling == I915_TILING_X ? 512 : 128, + }; ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); @@ -262,9 +249,9 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) goto close_and_return; } - struct drm_i915_gem_get_tiling get_tiling; - VG_CLEAR(get_tiling); - get_tiling.handle = gem_create.handle; + struct drm_i915_gem_get_tiling get_tiling = { + .handle = gem_create.handle, + }; if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { assert(!"Failed to get BO tiling"); @@ -275,7 +262,7 @@ anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) close_and_return: - VG_CLEAR(close); + memset(&close, 0, sizeof(close)); close.handle = gem_create.handle; anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); @@ -285,12 +272,9 @@ close_and_return: int anv_gem_create_context(struct anv_device *device) { - struct drm_i915_gem_context_create create; - int ret; - - VG_CLEAR(create); + struct drm_i915_gem_context_create create = { 0 }; - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); if (ret == -1) return -1; @@ -300,10 +284,9 @@ anv_gem_create_context(struct anv_device *device) int anv_gem_destroy_context(struct anv_device *device, int context) { - struct drm_i915_gem_context_destroy destroy; - - VG_CLEAR(destroy); - destroy.ctx_id = context; + struct drm_i915_gem_context_destroy destroy = { + .ctx_id = context, + }; return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); } @@ -311,11 +294,9 @@ anv_gem_destroy_context(struct anv_device *device, int context) int anv_gem_get_aperture(int fd, uint64_t *size) { - struct drm_i915_gem_get_aperture aperture; - int ret; + struct drm_i915_gem_get_aperture aperture = { 0 }; - VG_CLEAR(aperture); - ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret == -1) return -1; @@ -327,14 +308,12 @@ anv_gem_get_aperture(int fd, uint64_t *size) int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) { - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.handle = gem_handle; - args.flags = DRM_CLOEXEC; + struct drm_prime_handle args = { + .handle = gem_handle, + .flags = DRM_CLOEXEC, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); if (ret == -1) return -1; @@ -344,13 +323,11 @@ anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd) { - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.fd = fd; + struct drm_prime_handle args = { + .fd = fd, + }; - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); if (ret == -1) return 0; -- cgit v1.2.3 From 41a147904a95ba699358e8209e91d535f853ba61 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Mar 2016 18:35:00 -0800 Subject: anv/wsi: Throttle rendering to no more than 2 frames ahead Right now, Vulkan apps can pretty easily DOS the GPU by simply submitting a lot of batches. This commit makes us wait until the rendering for earlier frames is comlete before continuing. By waiting 2 frames out, we can still keep the pipe reasonably full but without taking the entire system down. This is similar to what the GL driver does today. --- src/intel/vulkan/anv_wsi.c | 38 ++++++++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_wsi.h | 4 ++++ 2 files changed, 42 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index c5911a3635b..c2938f3836f 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -132,6 +132,14 @@ VkResult anv_CreateSwapchainKHR( if (result != VK_SUCCESS) return result; + if (pAllocator) + swapchain->alloc = *pAllocator; + else + swapchain->alloc = device->alloc; + + for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) + swapchain->fences[i] = VK_NULL_HANDLE; + *pSwapchain = anv_swapchain_to_handle(swapchain); return VK_SUCCESS; @@ -144,6 +152,11 @@ void anv_DestroySwapchainKHR( { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) { + if (swapchain->fences[i] != VK_NULL_HANDLE) + anv_DestroyFence(device, swapchain->fences[i], pAllocator); + } + swapchain->destroy(swapchain, pAllocator); } @@ -185,11 +198,36 @@ VkResult anv_QueuePresentKHR( assert(swapchain->device == queue->device); + if (swapchain->fences[0] == VK_NULL_HANDLE) { + result = anv_CreateFence(anv_device_to_handle(queue->device), + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, &swapchain->alloc, &swapchain->fences[0]); + if (result != VK_SUCCESS) + return result; + } else { + anv_ResetFences(anv_device_to_handle(queue->device), + 1, &swapchain->fences[0]); + } + + anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]); + result = swapchain->queue_present(swapchain, queue, pPresentInfo->pImageIndices[i]); /* TODO: What if one of them returns OUT_OF_DATE? */ if (result != VK_SUCCESS) return result; + + VkFence last = swapchain->fences[2]; + swapchain->fences[2] = swapchain->fences[1]; + swapchain->fences[1] = swapchain->fences[0]; + swapchain->fences[0] = last; + + if (last != VK_NULL_HANDLE) { + anv_WaitForFences(anv_device_to_handle(queue->device), + 1, &last, true, 1); + } } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_wsi.h b/src/intel/vulkan/anv_wsi.h index 6e9ff9b8447..bf17f033173 100644 --- a/src/intel/vulkan/anv_wsi.h +++ b/src/intel/vulkan/anv_wsi.h @@ -53,6 +53,10 @@ struct anv_wsi_interface { struct anv_swapchain { struct anv_device *device; + VkAllocationCallbacks alloc; + + VkFence fences[3]; + VkResult (*destroy)(struct anv_swapchain *swapchain, const VkAllocationCallbacks *pAllocator); VkResult (*get_images)(struct anv_swapchain *swapchain, -- cgit v1.2.3 From e920b184e9a0cd3864b1db95921ef5d8b2227c6a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 12:28:32 -0800 Subject: anv/x11: Split image creation into a helper function This lets us clean up error handling and make it correct. --- src/intel/vulkan/anv_wsi_x11.c | 250 +++++++++++++++++++++++------------------ 1 file changed, 142 insertions(+), 108 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index a63cb6e7c5b..aa7e6ebe3e7 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -520,27 +520,138 @@ x11_queue_present(struct anv_swapchain *anv_chain, } static VkResult -x11_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) +x11_image_init(struct anv_device *device, struct x11_swapchain *chain, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct x11_image *image) { - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - xcb_void_cookie_t cookie; + VkResult result; + + VkImage image_h; + result = anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->image->format)); + + VkDeviceMemory memory_h; + result = anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + if (result != VK_SUCCESS) + goto fail_create_image; + + image->memory = anv_device_memory_from_handle(memory_h); + image->memory->bo.is_winsys_bo = true; + + anv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0); + + struct anv_surface *surface = &image->image->color_surface; + assert(surface->isl.tiling == ISL_TILING_X); + + int ret = anv_gem_set_tiling(device, image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail_alloc_memory; + } - for (uint32_t i = 0; i < chain->image_count; i++) { - struct x11_image *image = &chain->images[i]; + int fd = anv_gem_handle_to_fd(device, image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail_alloc_memory; + } - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + uint32_t bpp = 32; + uint32_t depth = 24; + image->pixmap = xcb_generate_id(chain->conn); - cookie = xcb_free_pixmap(chain->conn, image->pixmap); - xcb_discard_reply(chain->conn, cookie.sequence); + xcb_void_cookie_t cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + image->pixmap, + chain->window, + image->image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); - anv_DestroyImage(anv_device_to_handle(chain->base.device), - anv_image_to_handle(image->image), pAllocator); + image->busy = false; - anv_FreeMemory(anv_device_to_handle(chain->base.device), - anv_device_memory_to_handle(image->memory), pAllocator); - } + xcb_discard_reply(chain->conn, cookie.sequence); + + return VK_SUCCESS; + +fail_alloc_memory: + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); + +fail_create_image: + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + return result; +} + +static void +x11_image_finish(struct x11_swapchain *chain, + const VkAllocationCallbacks* pAllocator, + struct x11_image *image) +{ + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + xcb_void_cookie_t cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + anv_DestroyImage(anv_device_to_handle(chain->base.device), + anv_image_to_handle(image->image), pAllocator); + + anv_FreeMemory(anv_device_to_handle(chain->base.device), + anv_device_memory_to_handle(image->memory), pAllocator); +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) + x11_image_finish(chain, pAllocator, &chain->images[i]); anv_free2(&chain->base.device->alloc, pAllocator, chain); @@ -581,102 +692,11 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->image_count = num_images; chain->next_image = 0; - for (uint32_t i = 0; i < chain->image_count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(anv_device_to_handle(device), - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - NULL, - &image_h); - - image = anv_image_from_handle(image_h); - assert(anv_format_is_color(image->format)); - - surface = &image->color_surface; - - anv_AllocateMemory(anv_device_to_handle(device), - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - NULL /* XXX: pAllocator */, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - memory->bo.is_winsys_bo = true; - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "set_tiling failed: %m"); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "handle_to_fd failed: %m"); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->isl.row_pitch, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - chain->images[i].busy = false; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - chain->gc = xcb_generate_id(chain->conn); if (!chain->gc) { /* FINISHME: Choose a better error. */ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; + goto fail_alloc; } cookie = xcb_create_gc(chain->conn, @@ -686,11 +706,25 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, (uint32_t []) { 0 }); xcb_discard_reply(chain->conn, cookie.sequence); + uint32_t image = 0; + for (; image < chain->image_count; image++) { + result = x11_image_init(device, chain, pCreateInfo, pAllocator, + &chain->images[image]); + if (result != VK_SUCCESS) + goto fail_init_images; + } + *swapchain_out = &chain->base; return VK_SUCCESS; - fail: +fail_init_images: + for (uint32_t j = 0; j < image; j++) + x11_image_finish(chain, pAllocator, &chain->images[j]); + +fail_alloc: + anv_free2(&device->alloc, pAllocator, chain); + return result; } -- cgit v1.2.3 From 9bff5266beb9bacf86b199ca5ecaafaac8fae948 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Kristensen Date: Thu, 10 Mar 2016 21:47:56 -0800 Subject: anv/x11: Add present support The old DRI3 implementation just used CopyArea instead of present. We still don't support all the MST fancyness, but it should at least avoid some copies and allow for. v2 (Jason Ekstrand): - Better object cleanup and destruction - Handle the CONFIGURE_NOTIFY event and return OUT_OF_DATE when needed - Track dirtyness via IDLE_NOTIFY rather than interating through the images sequentially --- src/intel/vulkan/Makefile.am | 3 +- src/intel/vulkan/anv_wsi_x11.c | 188 ++++++++++++++++++++++++++++++++--------- 2 files changed, 148 insertions(+), 43 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index f20cd41fbba..acf84e55871 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -165,7 +165,8 @@ anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) CLEANFILES = $(BUILT_SOURCES) -libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) \ + -lxcb -lxcb-dri3 -lxcb-present -lxcb-sync -lxshmfence \ $(top_builddir)/src/intel/isl/libisl.la \ $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ $(top_builddir)/src/mesa/libmesa.la \ diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index aa7e6ebe3e7..f9f72be154b 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include #include #include #include @@ -420,8 +421,9 @@ struct x11_image { struct anv_image * image; struct anv_device_memory * memory; xcb_pixmap_t pixmap; - xcb_get_geometry_cookie_t geom_cookie; bool busy; + struct xshmfence * shm_fence; + uint32_t sync_fence; }; struct x11_swapchain { @@ -432,7 +434,12 @@ struct x11_swapchain { xcb_gc_t gc; VkExtent2D extent; uint32_t image_count; - uint32_t next_image; + + xcb_present_event_t event_id; + xcb_special_event_t * special_event; + uint64_t send_sbc; + uint32_t stamp; + struct x11_image images[0]; }; @@ -456,6 +463,42 @@ x11_get_images(struct anv_swapchain *anv_chain, return VK_SUCCESS; } +static VkResult +x11_handle_dri3_present_event(struct x11_swapchain *chain, + xcb_present_generic_event_t *event) +{ + switch (event->evtype) { + case XCB_PRESENT_CONFIGURE_NOTIFY: { + xcb_present_configure_notify_event_t *config = (void *) event; + + if (config->width != chain->extent.width || + config->height != chain->extent.height) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + break; + } + + case XCB_PRESENT_EVENT_IDLE_NOTIFY: { + xcb_present_idle_notify_event_t *idle = (void *) event; + + for (unsigned i = 0; i < chain->image_count; i++) { + if (chain->images[i].pixmap == idle->pixmap) { + chain->images[i].busy = false; + break; + } + } + + break; + } + + case XCB_PRESENT_COMPLETE_NOTIFY: + default: + break; + } + + return VK_SUCCESS; +} + static VkResult x11_acquire_next_image(struct anv_swapchain *anv_chain, uint64_t timeout, @@ -463,30 +506,28 @@ x11_acquire_next_image(struct anv_swapchain *anv_chain, uint32_t *image_index) { struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[chain->next_image]; - - if (image->busy) { - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = - xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); - if (!geom) { - free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + xshmfence_await(chain->images[i].shm_fence); + *image_index = i; + return VK_SUCCESS; + } } - if (geom->width != chain->extent.width || - geom->height != chain->extent.height) { - free(geom); + xcb_flush(chain->conn); + xcb_generic_event_t *event = + xcb_wait_for_special_event(chain->conn, chain->special_event); + if (!event) return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - free(geom); - image->busy = false; + VkResult result = x11_handle_dri3_present_event(chain, (void *)event); + free(event); + if (result != VK_SUCCESS) + return result; } - - *image_index = chain->next_image; - chain->next_image = (chain->next_image + 1) % chain->image_count; - return VK_SUCCESS; } static VkResult @@ -499,19 +540,31 @@ x11_queue_present(struct anv_swapchain *anv_chain, assert(image_index < chain->image_count); - xcb_void_cookie_t cookie; + uint32_t options = XCB_PRESENT_OPTION_NONE; - cookie = xcb_copy_area(chain->conn, - image->pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); + int64_t target_msc = 0; + int64_t divisor = 0; + int64_t remainder = 0; + + options |= XCB_PRESENT_OPTION_ASYNC; - image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + xcb_void_cookie_t cookie = + xcb_present_pixmap(chain->conn, + chain->window, + image->pixmap, + (uint32_t) chain->send_sbc, + 0, /* valid */ + 0, /* update */ + 0, /* x_off */ + 0, /* y_off */ + XCB_NONE, /* target_crtc */ + XCB_NONE, + image->sync_fence, + options, + target_msc, + divisor, + remainder, 0, NULL); + xcb_discard_reply(chain->conn, cookie.sequence); image->busy = true; xcb_flush(chain->conn); @@ -525,6 +578,7 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, const VkAllocationCallbacks* pAllocator, struct x11_image *image) { + xcb_void_cookie_t cookie; VkResult result; VkImage image_h; @@ -599,7 +653,7 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, uint32_t depth = 24; image->pixmap = xcb_generate_id(chain->conn); - xcb_void_cookie_t cookie = + cookie = xcb_dri3_pixmap_from_buffer_checked(chain->conn, image->pixmap, chain->window, @@ -608,13 +662,35 @@ x11_image_init(struct anv_device *device, struct x11_swapchain *chain, pCreateInfo->imageExtent.height, surface->isl.row_pitch, depth, bpp, fd); + xcb_discard_reply(chain->conn, cookie.sequence); - image->busy = false; + int fence_fd = xshmfence_alloc_shm(); + if (fence_fd < 0) + goto fail_pixmap; - xcb_discard_reply(chain->conn, cookie.sequence); + image->shm_fence = xshmfence_map_shm(fence_fd); + if (image->shm_fence == NULL) + goto fail_shmfence_alloc; + + image->sync_fence = xcb_generate_id(chain->conn); + xcb_dri3_fence_from_fd(chain->conn, + image->pixmap, + image->sync_fence, + false, + fence_fd); + + image->busy = false; + xshmfence_trigger(image->shm_fence); return VK_SUCCESS; +fail_shmfence_alloc: + close(fence_fd); + +fail_pixmap: + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + fail_alloc_memory: anv_FreeMemory(anv_device_to_handle(chain->base.device), anv_device_memory_to_handle(image->memory), pAllocator); @@ -631,10 +707,13 @@ x11_image_finish(struct x11_swapchain *chain, const VkAllocationCallbacks* pAllocator, struct x11_image *image) { - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + xcb_void_cookie_t cookie; + + cookie = xcb_sync_destroy_fence(chain->conn, image->sync_fence); + xcb_discard_reply(chain->conn, cookie.sequence); + xshmfence_unmap_shm(image->shm_fence); - xcb_void_cookie_t cookie = xcb_free_pixmap(chain->conn, image->pixmap); + cookie = xcb_free_pixmap(chain->conn, image->pixmap); xcb_discard_reply(chain->conn, cookie.sequence); anv_DestroyImage(anv_device_to_handle(chain->base.device), @@ -653,6 +732,8 @@ x11_swapchain_destroy(struct anv_swapchain *anv_chain, for (uint32_t i = 0; i < chain->image_count; i++) x11_image_finish(chain, pAllocator, &chain->images[i]); + xcb_unregister_for_special_event(chain->conn, chain->special_event); + anv_free2(&chain->base.device->alloc, pAllocator, chain); return VK_SUCCESS; @@ -670,9 +751,18 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, xcb_void_cookie_t cookie; VkResult result; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + int num_images = pCreateInfo->minImageCount; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = anv_alloc2(&device->alloc, pAllocator, size, 8, @@ -690,13 +780,25 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->window = surface->window; chain->extent = pCreateInfo->imageExtent; chain->image_count = num_images; - chain->next_image = 0; + + chain->event_id = xcb_generate_id(chain->conn); + xcb_present_select_input(chain->conn, chain->event_id, chain->window, + XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY | + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY | + XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); + + /* Create an XCB event queue to hold present events outside of the usual + * application event queue + */ + chain->special_event = + xcb_register_for_special_xge(chain->conn, &xcb_present_id, + chain->event_id, NULL); chain->gc = xcb_generate_id(chain->conn); if (!chain->gc) { /* FINISHME: Choose a better error. */ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_alloc; + goto fail_register; } cookie = xcb_create_gc(chain->conn, @@ -722,7 +824,9 @@ fail_init_images: for (uint32_t j = 0; j < image; j++) x11_image_finish(chain, pAllocator, &chain->images[j]); -fail_alloc: +fail_register: + xcb_unregister_for_special_event(chain->conn, chain->special_event); + anv_free2(&device->alloc, pAllocator, chain); return result; -- cgit v1.2.3 From 753ebe4457444e13124eba1e2e2e07edc9ab3c09 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 Mar 2016 17:22:44 -0800 Subject: anv/x11: Reset the SHM fence before presenting the pixmap This seems to fix the flicker issue that I was seeing with dota2 --- src/intel/vulkan/anv_wsi_x11.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index f9f72be154b..9ef02961a93 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -548,6 +548,8 @@ x11_queue_present(struct anv_swapchain *anv_chain, options |= XCB_PRESENT_OPTION_ASYNC; + xshmfence_reset(image->shm_fence); + xcb_void_cookie_t cookie = xcb_present_pixmap(chain->conn, chain->window, -- cgit v1.2.3 From 41af9b2e517dd0c17e519490ca915b96f6898390 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Mar 2016 08:54:41 -0800 Subject: HACK: Don't re-configure L3$ in render stages pre-BDW This fixes a "regression" on Haswell and prior caused by merging the gen7 and gen8 flush_state functions. Haswell should still work just fine if you're on a 4.4 kernel, but we really should make it detect the command parser version and do something intelligent. --- src/intel/vulkan/genX_cmd_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c3d2043dcdf..723f6d81a39 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -324,7 +324,18 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); +#if GEN_GEN >= 8 + /* FIXME (jason): Currently, the config_l3 function causes problems on + * Haswell and prior if you have a kernel older than 4.4. In order to + * work, it requires a couple of registers be white-listed in the + * command parser and they weren't added until 4.4. What we should do + * is check the command parser version and make it a no-op if your + * command parser is either off or too old. Compute won't work 100%, + * but at least 3-D will. In the mean time, I'm going to make this + * gen8+ only so that we can get Haswell working again. + */ genX(cmd_buffer_config_l3)(cmd_buffer, false); +#endif genX(flush_pipeline_select_3d)(cmd_buffer); -- cgit v1.2.3 From 1b126305ded36f6b416ada08e29ff84faeafef99 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:16:58 -0800 Subject: anv/genX: Add flush_pipeline_select_gpgpu Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_genX.h | 1 + src/intel/vulkan/gen7_cmd_buffer.c | 6 +----- src/intel/vulkan/gen8_cmd_buffer.c | 23 +---------------------- src/intel/vulkan/genX_cmd_buffer.c | 27 +++++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 27 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 77d387ae748..908a9e0efa9 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -40,6 +40,7 @@ void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); +void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 56f03268133..dbf05d06d0f 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -365,11 +365,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) bool needs_slm = cs_prog_data->base.total_shared > 0; genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } + genX(flush_pipeline_select_gpgpu)(cmd_buffer); if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 4a926255a5d..87b5e340772 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -446,28 +446,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) bool needs_slm = cs_prog_data->base.total_shared > 0; genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm); - if (cmd_buffer->state.current_pipeline != GPGPU) { -#if GEN_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if GEN_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } + genX(flush_pipeline_select_gpgpu)(cmd_buffer); if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 723f6d81a39..d0a80f53e63 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -777,6 +777,33 @@ genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) } } +void +genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if GEN_GEN >= 8 && GEN_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if GEN_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } +} + struct anv_state genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer, struct anv_framebuffer *fb) -- cgit v1.2.3 From c8ec65a1f5a85dbef3210dc49684fcfed49b7ea2 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:25:45 -0800 Subject: anv: Add flush_pipeline_before_pipeline_select flush_pipeline_before_pipeline_select adds workarounds required before switching the pipeline. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d0a80f53e63..1ce53a81f1b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -764,10 +764,31 @@ void genX(CmdDispatchIndirect)( anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH)); } +static void +flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, + uint32_t pipeline) +{ +#if GEN_GEN >= 8 && GEN_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + if (pipeline == GPGPU) + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS)); +#endif +} + void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != _3D) { + flush_pipeline_before_pipeline_select(cmd_buffer, _3D); + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if GEN_GEN >= 9 .MaskBits = 3, @@ -781,19 +802,7 @@ void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) { if (cmd_buffer->state.current_pipeline != GPGPU) { -#if GEN_GEN >= 8 && GEN_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif + flush_pipeline_before_pipeline_select(cmd_buffer, GPGPU); anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), #if GEN_GEN >= 9 -- cgit v1.2.3 From b83785d86d2c7f07323920615c72a9f09695a9a7 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 10 Mar 2016 17:19:13 -0800 Subject: anv/gen7: Add stall and flushes before switching pipelines This is a port of 18c76551ee425b981efefc61f663a7781df17882 from OpenGL to Vulkan. Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1ce53a81f1b..1b53f85419b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -780,6 +780,30 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, */ if (pipeline == GPGPU) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS)); +#elif GEN_GEN <= 7 + /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] + * PIPELINE_SELECT [DevBWR+]": + * + * Project: DEVSNB+ + * + * Software must ensure all the write caches are flushed through a + * stalling PIPE_CONTROL command followed by another PIPE_CONTROL + * command to invalidate read only caches prior to programming + * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true, + .DepthCacheFlushEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .StateCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); #endif } -- cgit v1.2.3 From abaa3bed22ebb580724a5741bb8bee69e476a85f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Mar 2016 15:24:24 -0700 Subject: anv/device: Flush the fence batch rather than the start of the BO --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 768e2eb3be1..bcd7a9e3c0d 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1409,7 +1409,7 @@ VkResult anv_CreateFence( assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); assert(batch.next - batch.start <= CACHELINE_SIZE); __builtin_ia32_mfence(); - __builtin_ia32_clflush(fence->bo.map); + __builtin_ia32_clflush(batch.start); } fence->exec2_objects[0].handle = fence->bo.gem_handle; -- cgit v1.2.3 From 2d8c6321177a92f6f1383adc1e75dde1610cfc64 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 8 Mar 2016 14:12:55 -0800 Subject: anv/blit2d: Copy anv_meta_blit.c functions These will be customized for blit2d operations. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 3 + src/intel/vulkan/anv_meta_blit2d.c | 597 +++++++++++++++++++++++++++++++++++++ 2 files changed, 600 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index e2e00433c49..98888aea87f 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -53,6 +53,9 @@ void anv_device_finish_meta_resolve_state(struct anv_device *device); VkResult anv_device_init_meta_blit_state(struct anv_device *device); void anv_device_finish_meta_blit_state(struct anv_device *device); +VkResult anv_device_init_meta_blit2d_state(struct anv_device *device); +void anv_device_finish_meta_blit2d_state(struct anv_device *device); + void anv_meta_save(struct anv_meta_saved_state *state, const struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6f0734254bf..2d447ebe3a8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -22,6 +22,7 @@ */ #include "anv_meta.h" +#include "nir/nir_builder.h" static VkFormat vk_format_for_size(int bs) @@ -53,6 +54,222 @@ vk_format_for_size(int bs) } } +static void +meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) + / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) + / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / + (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(anv_device_to_handle(device), + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_DestroyDescriptorPool(anv_device_to_handle(device), + desc_pool, &cmd_buffer->pool->alloc); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -209,3 +426,383 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, } } + +static nir_shader * +build_nir_vertex_shader(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(vec4)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +void +anv_device_finish_meta_blit2d_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit2d_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for blitting, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} -- cgit v1.2.3 From 997a873f0c67108acf5733538c4475285e3f9b2e Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 9 Mar 2016 11:31:49 -0800 Subject: anv/blit2d: Customize meta blit structs and functions for blit2d API * Add fields in meta struct * Add support in meta init/teardown * Switch to custom meta_emit_blit2d() Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.c | 7 +++ src/intel/vulkan/anv_meta_blit2d.c | 114 ++++++++----------------------------- src/intel/vulkan/anv_private.h | 10 ++++ 3 files changed, 41 insertions(+), 90 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c index 82944ea1a92..1ac0306e031 100644 --- a/src/intel/vulkan/anv_meta.c +++ b/src/intel/vulkan/anv_meta.c @@ -150,8 +150,14 @@ anv_device_init_meta(struct anv_device *device) if (result != VK_SUCCESS) goto fail_blit; + result = anv_device_init_meta_blit2d_state(device); + if (result != VK_SUCCESS) + goto fail_blit2d; + return VK_SUCCESS; +fail_blit2d: + anv_device_finish_meta_blit_state(device); fail_blit: anv_device_finish_meta_resolve_state(device); fail_resolve: @@ -166,4 +172,5 @@ anv_device_finish_meta(struct anv_device *device) anv_device_finish_meta_resolve_state(device); anv_device_finish_meta_clear_state(device); anv_device_finish_meta_blit_state(device); + anv_device_finish_meta_blit2d_state(device); } diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 2d447ebe3a8..78d4b04d5d8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -56,15 +56,12 @@ vk_format_for_size(int bs) static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, VkExtent3D src_extent, - struct anv_image *dest_image, struct anv_image_view *dest_iview, VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter) + VkExtent3D dest_extent) { struct anv_device *device = cmd_buffer->device; @@ -73,8 +70,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, float tex_coord[3]; } *vb_data; - assert(src_image->samples == dest_image->samples); - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); struct anv_state vb_state = @@ -144,8 +139,8 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CreateSampler)(anv_device_to_handle(device), &(VkSamplerCreateInfo) { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = blit_filter, - .minFilter = blit_filter, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, }, &cmd_buffer->pool->alloc, &sampler); VkDescriptorPool desc_pool; @@ -170,7 +165,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout + .pSetLayouts = &device->meta_state.blit2d.ds_layout }, &set); anv_UpdateDescriptorSets(anv_device_to_handle(device), @@ -209,7 +204,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit.render_pass, + .renderPass = device->meta_state.blit2d.render_pass, .framebuffer = fb, .renderArea = { .offset = { dest_offset.x, dest_offset.y }, @@ -219,21 +214,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); - VkPipeline pipeline; - - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src; - break; - case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src; - break; - case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src; - break; - default: - unreachable(!"bad VkImageType"); - } + VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), @@ -252,7 +233,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, + device->meta_state.blit2d.pipeline_layout, 0, 1, &set, 0, NULL); ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); @@ -410,16 +391,13 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, &iview_info, cmd_buffer, img_o, dst_usage); /* Perform blit */ - anv_meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), + meta_emit_blit2d(cmd_buffer, &src_iview, src_offset_el, (VkExtent3D){rects[r].width, rects[r].height, 1}, - anv_image_from_handle(dst_image), &dst_iview, dst_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, - VK_FILTER_NEAREST); + (VkExtent3D){rects[r].width, rects[r].height, 1}); anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); @@ -511,22 +489,16 @@ void anv_device_finish_meta_blit2d_state(struct anv_device *device) { anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, + device->meta_state.blit2d.render_pass, &device->meta_state.alloc); anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, + device->meta_state.blit2d.pipeline_2d_src, &device->meta_state.alloc); anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, + device->meta_state.blit2d.pipeline_layout, &device->meta_state.alloc); anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, + device->meta_state.blit2d.ds_layout, &device->meta_state.alloc); } @@ -564,7 +536,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pPreserveAttachments = (uint32_t[]) { 0 }, }, .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); if (result != VK_SUCCESS) goto fail; @@ -577,18 +549,10 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .nir = build_nir_vertex_shader(), }; - struct anv_shader_module fs_1d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), - }; - struct anv_shader_module fs_2d = { .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), }; - struct anv_shader_module fs_3d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), - }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -646,7 +610,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, &device->meta_state.alloc, - &device->meta_state.blit.ds_layout); + &device->meta_state.blit2d.ds_layout); if (result != VK_SUCCESS) goto fail_render_pass; @@ -654,9 +618,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, + .pSetLayouts = &device->meta_state.blit2d.ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); if (result != VK_SUCCESS) goto fail_descriptor_set_layout; @@ -731,8 +695,8 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - .renderPass = device->meta_state.blit.render_pass, + .layout = device->meta_state.blit2d.pipeline_layout, + .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; @@ -745,64 +709,34 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .use_rectlist = true }; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_1d; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); if (result != VK_SUCCESS) - goto fail_pipeline_2d; + goto fail_pipeline_layout; ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); return VK_SUCCESS; - fail_pipeline_2d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - - fail_pipeline_1d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - fail_pipeline_layout: anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, + device->meta_state.blit2d.pipeline_layout, &device->meta_state.alloc); fail_descriptor_set_layout: anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, + device->meta_state.blit2d.ds_layout, &device->meta_state.alloc); fail_render_pass: anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, + device->meta_state.blit2d.render_pass, &device->meta_state.alloc); ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); fail: return result; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0ef840da10e..939cd087377 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -607,6 +607,16 @@ struct anv_meta_state { VkDescriptorSetLayout ds_layout; } blit; + struct { + VkRenderPass render_pass; + + /** Pipeline that copies from a 2D image. */ + VkPipeline pipeline_2d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit2d; + struct { /** Pipeline [i] resolves an image with 2^(i+1) samples. */ VkPipeline pipelines[MAX_SAMPLES_LOG2]; -- cgit v1.2.3 From 1a0c63b8804812081d660642539bb411dc560992 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 11 Mar 2016 16:25:02 -0800 Subject: Revert "anv/meta: Prefix anv_ to meta_emit_blit()" This reverts commit 514c0557178b0325c59a28d68b0f250f0eeaddf5. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 2 +- src/intel/vulkan/anv_meta_blit.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 98888aea87f..5616252b5a4 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -109,7 +109,7 @@ anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); void -anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 2c3c91733a7..7bddc6b2d42 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -120,7 +120,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, } void -anv_meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, VkOffset3D src_offset, @@ -438,7 +438,7 @@ void anv_CmdBlitImage( }, cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - anv_meta_emit_blit(cmd_buffer, + meta_emit_blit(cmd_buffer, src_image, &src_iview, pRegions[r].srcOffsets[0], src_extent, dest_image, &dest_iview, -- cgit v1.2.3 From b487acc489afc277a7611f14e7319bc7340e7777 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 11 Mar 2016 16:26:28 -0800 Subject: Revert "anv/meta: Make meta_emit_blit() public" This reverts commit f39168392243d6dacefbc8708b764c5978ff24df. Some conflicts had to be resolved in order for this revert to be successful. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta.h | 11 ----------- src/intel/vulkan/anv_meta_blit.c | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h index 5616252b5a4..6bd5c1dfba2 100644 --- a/src/intel/vulkan/anv_meta.h +++ b/src/intel/vulkan/anv_meta.h @@ -108,17 +108,6 @@ void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save); -void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_image *dest_image, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter); #ifdef __cplusplus } #endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 7bddc6b2d42..09e2dbb2918 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -119,7 +119,7 @@ meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void +static void meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_image *src_image, struct anv_image_view *src_iview, -- cgit v1.2.3 From f8f98869157b678320ab8b8fcd50ab7285dac0be Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 16:06:14 -0800 Subject: anv/blit2d: Use texel fetch in frag shader The texelFetch operation requires that the sampled texture coordinates be unnormalized integers. This will simplify the copy shader for w-tiled images (stencil buffers). v2 (Jason): Use f2i for texel coords Fix num_components indirectly Use float inputs for interpolation Nest tex_pos functions Suggested-by: Jason Ekstrand Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 43 ++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 78d4b04d5d8..839ab02c904 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -83,11 +83,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)(src_offset.x + src_extent.width) - / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) - / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x + src_extent.width, + src_offset.y + src_extent.height, + src_offset.z, }, }; @@ -97,10 +95,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y + dest_extent.height, }, .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / - (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x, + src_offset.y + src_extent.height, + src_offset.z, }, }; @@ -110,9 +107,9 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, dest_offset.y, }, .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)src_offset.y / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, + src_offset.x, + src_offset.y, + src_offset.z, }, }; @@ -438,22 +435,16 @@ static nir_shader * build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) { const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "v_tex_pos"); + vec3, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; - - /* Swizzle the array index which comes in as Z coordinate into the right - * position. - */ - unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; - nir_ssa_def *const tex_pos = - nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, - (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, @@ -463,16 +454,18 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) sampler->data.descriptor_set = 0; sampler->data.binding = 0; - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); tex->sampler_dim = tex_dim; - tex->op = nir_texop_tex; + tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->src[1].src_type = nir_tex_src_lod; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; /* TODO */ tex->is_array = glsl_sampler_type_is_array(sampler_type); tex->coord_components = tex_pos->num_components; tex->texture = nir_deref_var_create(tex, sampler); - tex->sampler = nir_deref_var_create(tex, sampler); + tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); nir_builder_instr_insert(&b, &tex->instr); -- cgit v1.2.3 From 92fb65f1176334159c9b2327bbb693332b0b6bb6 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 18:25:10 -0800 Subject: anv/blit2d: Remove sampler from pipeline Since we're using texelFetch with a sampled image, a sampler is no longer needed. This agrees with the Vulkan Spec section 13.2.4 Descriptor Set Updates: sampler is a sampler handle, and is used in descriptor updates for types VK_DESCRIPTOR_TYPE_SAMPLER and VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER if the binding being updated does not use immutable samplers. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 839ab02c904..4fab5d1b3dd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -132,14 +132,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkSampler sampler; - ANV_CALL(CreateSampler)(anv_device_to_handle(device), - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - }, &cmd_buffer->pool->alloc, &sampler); - VkDescriptorPool desc_pool; anv_CreateDescriptorPool(anv_device_to_handle(device), &(const VkDescriptorPoolCreateInfo) { @@ -150,7 +142,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .poolSizeCount = 1, .pPoolSizes = (VkDescriptorPoolSize[]) { { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .descriptorCount = 1 }, } @@ -174,10 +166,10 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pImageInfo = (VkDescriptorImageInfo[]) { { - .sampler = sampler, + .sampler = NULL, .imageView = anv_image_view_to_handle(src_iview), .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }, @@ -242,8 +234,6 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, */ anv_DestroyDescriptorPool(anv_device_to_handle(device), desc_pool, &cmd_buffer->pool->alloc); - anv_DestroySampler(anv_device_to_handle(device), sampler, - &cmd_buffer->pool->alloc); anv_DestroyFramebuffer(anv_device_to_handle(device), fb, &cmd_buffer->pool->alloc); } @@ -593,7 +583,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pBindings = (VkDescriptorSetLayoutBinding[]) { { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = NULL -- cgit v1.2.3 From 5647de8ba56d8b4f2247eecc2cfcb3ec596dafe1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 14 Mar 2016 08:15:16 -0700 Subject: anv/blit2d: Only use one extent in meta_emit_blit2d Since scaling isn't involved, we don't need multiple extents. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 4fab5d1b3dd..cb7e64b01df 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -58,10 +58,9 @@ static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, VkOffset3D src_offset, - VkExtent3D src_extent, struct anv_image_view *dest_iview, VkOffset3D dest_offset, - VkExtent3D dest_extent) + VkExtent3D extent) { struct anv_device *device = cmd_buffer->device; @@ -79,12 +78,12 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, vb_data[0] = (struct blit_vb_data) { .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, + dest_offset.x + extent.width, + dest_offset.y + extent.height, }, .tex_coord = { - src_offset.x + src_extent.width, - src_offset.y + src_extent.height, + src_offset.x + extent.width, + src_offset.y + extent.height, src_offset.z, }, }; @@ -92,11 +91,11 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, vb_data[1] = (struct blit_vb_data) { .pos = { dest_offset.x, - dest_offset.y + dest_extent.height, + dest_offset.y + extent.height, }, .tex_coord = { src_offset.x, - src_offset.y + src_extent.height, + src_offset.y + extent.height, src_offset.z, }, }; @@ -197,7 +196,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .framebuffer = fb, .renderArea = { .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, + .extent = { extent.width, extent.height }, }, .clearValueCount = 0, .pClearValues = NULL, @@ -381,7 +380,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, meta_emit_blit2d(cmd_buffer, &src_iview, src_offset_el, - (VkExtent3D){rects[r].width, rects[r].height, 1}, &dst_iview, dst_offset_el, (VkExtent3D){rects[r].width, rects[r].height, 1}); -- cgit v1.2.3 From f33866ae0a4279af03997fa3fa7a47e2eb7ec8fe Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 14 Mar 2016 11:11:50 -0700 Subject: anv/blit: Remove completed finishme for VkFilter This task was finished as of: d9079648d0f1c380929dea0f3a447ddfdf5dcd27. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 09e2dbb2918..b83fa684ffd 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -366,8 +366,6 @@ void anv_CmdBlitImage( assert(src_image->samples == 1); assert(dest_image->samples == 1); - anv_finishme("respect VkFilter"); - meta_prepare_blit(cmd_buffer, &saved_state); for (unsigned r = 0; r < regionCount; r++) { -- cgit v1.2.3 From 5464f0c046ac68630355932639f18a02dc4b2d92 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 10 Mar 2016 11:06:25 -0800 Subject: anv/blit: Reduce number of VUE headers being read Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit.c | 2 +- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b83fa684ffd..e23b6978819 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -536,7 +536,7 @@ anv_device_init_meta_blit_state(struct anv_device *device) { .binding = 0, .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE }, { .binding = 1, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index cb7e64b01df..4a0bed1a335 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -541,7 +541,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) { .binding = 0, .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE }, { .binding = 1, -- cgit v1.2.3 From b1c5d45872c4c156b1366071c8532a0057a70745 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 11:50:53 -0700 Subject: anv/allocator: Add a size field to bo_pool_alloc --- src/intel/vulkan/anv_allocator.c | 4 +++- src/intel/vulkan/anv_batch_chain.c | 6 ++++-- src/intel/vulkan/anv_device.c | 4 ++-- src/intel/vulkan/anv_private.h | 3 ++- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 4fc83386a71..1928b9bb157 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -820,10 +820,12 @@ anv_bo_pool_finish(struct anv_bo_pool *pool) } VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) { VkResult result; + assert(pool->bo_size <= size); + void *next_free_void; if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { struct bo_pool_bo_link *next_free = next_free_void; diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index d24dd06d7eb..eab050f066b 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -251,7 +251,8 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, + ANV_CMD_BUFFER_BATCH_SIZE); if (result != VK_SUCCESS) goto fail_alloc; @@ -283,7 +284,8 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, + other_bbo->bo.size); if (result != VK_SUCCESS) goto fail_alloc; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index bcd7a9e3c0d..77fd72c42e1 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -717,7 +717,7 @@ anv_device_submit_simple_batch(struct anv_device *device, /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, 4096); if (result != VK_SUCCESS) return result; @@ -1390,7 +1390,7 @@ VkResult anv_CreateFence( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); if (result != VK_SUCCESS) return result; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 939cd087377..46e377c0490 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -476,7 +476,8 @@ struct anv_bo_pool { void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, uint32_t block_size); void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, + uint32_t size); void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); -- cgit v1.2.3 From ecfb07427632b99a9f424ad06f0967fa8a7fe2a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 13:06:08 -0700 Subject: anv/allocator: Make the bo_pool dynamically sized --- src/intel/vulkan/anv_allocator.c | 48 +++++++++++++++++++++++----------------- src/intel/vulkan/anv_device.c | 5 ++--- src/intel/vulkan/anv_private.h | 7 ++---- 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 1928b9bb157..e3b07ffa8bb 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -794,12 +794,10 @@ struct bo_pool_bo_link { }; void -anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t bo_size) +anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device) { pool->device = device; - pool->bo_size = bo_size; - pool->free_list = NULL; + memset(pool->free_list, 0, sizeof(pool->free_list)); VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); } @@ -807,13 +805,15 @@ anv_bo_pool_init(struct anv_bo_pool *pool, void anv_bo_pool_finish(struct anv_bo_pool *pool) { - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - anv_gem_munmap(link_copy.bo.map, pool->bo_size); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; + for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, link_copy.bo.size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } } VG(VALGRIND_DESTROY_MEMPOOL(pool)); @@ -824,29 +824,32 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) { VkResult result; - assert(pool->bo_size <= size); + const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size); + const unsigned pow2_size = 1 << size_log2; + const unsigned bucket = size_log2 - 12; + assert(bucket < ARRAY_SIZE(pool->free_list)); void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) { struct bo_pool_bo_link *next_free = next_free_void; *bo = VG_NOACCESS_READ(&next_free->bo); assert(bo->map == next_free); - assert(bo->size == pool->bo_size); + assert(size <= bo->size); - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); return VK_SUCCESS; } struct anv_bo new_bo; - result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + result = anv_bo_init_new(&new_bo, pool->device, pow2_size); if (result != VK_SUCCESS) return result; - assert(new_bo.size == pool->bo_size); + assert(new_bo.size == pow2_size); - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0); if (new_bo.map == NULL) { anv_gem_close(pool->device, new_bo.gem_handle); return vk_error(VK_ERROR_MEMORY_MAP_FAILED); @@ -854,7 +857,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) *bo = new_bo; - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); return VK_SUCCESS; } @@ -867,6 +870,11 @@ anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) struct bo_pool_bo_link *link = bo.map; link->bo = bo; + assert(util_is_power_of_two(bo.size)); + const unsigned size_log2 = ilog2_round_up(bo.size); + const unsigned bucket = size_log2 - 12; + assert(bucket < ARRAY_SIZE(pool->free_list)); + VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); - anv_ptr_free_list_push(&pool->free_list, link); + anv_ptr_free_list_push(&pool->free_list[bucket], link); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 77fd72c42e1..068626d1c1f 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -716,8 +716,7 @@ anv_device_submit_simple_batch(struct anv_device *device, /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); - assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, 4096); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); if (result != VK_SUCCESS) return result; @@ -829,7 +828,7 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); - anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + anv_bo_pool_init(&device->batch_bo_pool, device); anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 46e377c0490..6d98e0267a4 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -468,13 +468,10 @@ struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, struct anv_bo_pool { struct anv_device *device; - uint32_t bo_size; - - void *free_list; + void *free_list[16]; }; -void anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t block_size); +void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device); void anv_bo_pool_finish(struct anv_bo_pool *pool); VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size); -- cgit v1.2.3 From 869e393eb36e6912ca16fcfd060892c0de07bb49 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 Mar 2016 16:32:46 -0700 Subject: anv/batch_chain: Fall back to growing batches when chaining isn't available --- src/intel/vulkan/anv_batch_chain.c | 63 ++++++++++++++++++++++++++++++++++++-- src/intel/vulkan/anv_device.c | 7 +++++ src/intel/vulkan/anv_private.h | 2 ++ 3 files changed, 70 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index eab050f066b..034f3fda24a 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -340,6 +340,37 @@ anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); } +static VkResult +anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo, + struct anv_batch *batch, size_t aditional, + size_t batch_padding) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + + size_t new_size = bbo->bo.size; + while (new_size <= bbo->length + aditional + batch_padding) + new_size *= 2; + + if (new_size == bbo->bo.size) + return VK_SUCCESS; + + struct anv_bo new_bo; + VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, + &new_bo, new_size); + if (result != VK_SUCCESS) + return result; + + memcpy(new_bo.map, bbo->bo.map, bbo->length); + + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + + bbo->bo = new_bo; + anv_batch_bo_continue(bbo, batch, batch_padding); + + return VK_SUCCESS; +} + static void anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_cmd_buffer *cmd_buffer) @@ -478,6 +509,18 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) return VK_SUCCESS; } +static VkResult +anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + struct anv_state anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, uint32_t entries, uint32_t *state_offset) @@ -548,9 +591,14 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; cmd_buffer->batch.user_data = cmd_buffer; + if (cmd_buffer->device->can_chain_batches) { + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + } else { + cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch; + } + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START_length * 4); @@ -680,7 +728,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * determine this statically here so that this stays in sync with the * actual ExecuteCommands implementation. */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + if (!cmd_buffer->device->can_chain_batches) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT; + } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { /* If the secondary has exactly one batch buffer in its list *and* * that batch buffer is less than half of the maximum size, we're @@ -728,6 +778,15 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, anv_batch_emit_batch(&primary->batch, &secondary->batch); anv_cmd_buffer_emit_state_base_address(primary); break; + case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary); + unsigned length = secondary->batch.end - secondary->batch.start; + anv_batch_bo_grow(primary, bbo, &primary->batch, length, + GEN8_MI_BATCH_BUFFER_START_length * 4); + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + } case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { struct anv_batch_bo *first_bbo = list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 068626d1c1f..ce2045ecf7b 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -826,6 +826,13 @@ VkResult anv_CreateDevice( device->info = *physical_device->info; device->isl_dev = physical_device->isl_dev; + /* On Broadwell and later, we can use batch chaining to more efficiently + * implement growing command buffers. Prior to Haswell, the kernel + * command parser gets in the way and we have to fall back to growing + * the batch. + */ + device->can_chain_batches = device->info.gen >= 8; + pthread_mutex_init(&device->mutex, NULL); anv_bo_pool_init(&device->batch_bo_pool, device); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6d98e0267a4..03e87670165 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -672,6 +672,7 @@ struct anv_device { struct isl_device isl_dev; int context_id; int fd; + bool can_chain_batches; struct anv_bo_pool batch_bo_pool; @@ -1192,6 +1193,7 @@ struct anv_cmd_pool { enum anv_cmd_buffer_exec_mode { ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT, ANV_CMD_BUFFER_EXEC_MODE_CHAIN, ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, }; -- cgit v1.2.3 From 4844723405d901afee3ab6a4a6c642ae8ef8bcb4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:11:53 -0700 Subject: anv: Don't assert-fail if someone asks for a non-existent entrypoint --- src/intel/vulkan/anv_entrypoints_gen.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py index 1e4cfcb1755..cedecfeac70 100644 --- a/src/intel/vulkan/anv_entrypoints_gen.py +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -210,7 +210,6 @@ anv_resolve_entrypoint(uint32_t index) return validate_layer.entrypoints[index]; if (dispatch_devinfo == NULL) { - assert(anv_layer.entrypoints[index]); return anv_layer.entrypoints[index]; } -- cgit v1.2.3 From 204d937ac2623b230260f60d2d7c5d7233d697fb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:17:09 -0700 Subject: anv/device: Ignore the patch portion of the requested API version Fixes dEQP-VK.api.device_init.create_instance_name_version Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94661 --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ce2045ecf7b..622e6422c5a 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -223,7 +223,7 @@ VkResult anv_CreateInstance( } if (VK_MAKE_VERSION(1, 0, 0) > client_version || - client_version > VK_MAKE_VERSION(1, 0, 3)) { + client_version > VK_MAKE_VERSION(1, 0, 0xfff)) { return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, "Client requested version %d.%d.%d", VK_VERSION_MAJOR(client_version), -- cgit v1.2.3 From 20417b2cb05ff0f710eb6b6fbd9299ba915f8fc1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Mar 2016 16:21:21 -0700 Subject: anv/device: Advertise version 1.0.5 Nothing substantial has changed since 1.0.2 --- src/intel/vulkan/anv_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 622e6422c5a..54a1f1274ab 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -532,7 +532,7 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .apiVersion = VK_MAKE_VERSION(1, 0, 5), .driverVersion = 1, .vendorID = 0x8086, .deviceID = pdevice->chipset_id, -- cgit v1.2.3 From a5dc3c0f02aa523d1d3d123b62b9a187821079fe Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 22 Mar 2016 10:53:37 -0700 Subject: anv: Sanitize Image extents and offsets Prepare Image extents and offsets for internal consumption by assigning the default values implicitly defned by the spec. Fixes textures on several Vulkan demos in which the VkImageCopy depth is set to zero when copying a 2D image. v2 (Jason Ekstrand): Replace "prep" with "sanitize" Make function static inline Pass structs instead of pointers Reviewed-by: Jason Ekstrand Signed-off-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 24 ++++------------- src/intel/vulkan/anv_meta_copy.c | 53 ++++++++++++++++++++++++++----------- src/intel/vulkan/anv_meta_resolve.c | 41 +++++++++++++++++++++------- src/intel/vulkan/anv_private.h | 33 +++++++++++++++++++++++ 4 files changed, 106 insertions(+), 45 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 143a08413f7..b47425bd0e1 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -124,30 +124,16 @@ make_surface(const struct anv_device *dev, struct anv_surface *anv_surf = get_surface(image, aspect); - VkExtent3D extent; - switch (vk_info->imageType) { - case VK_IMAGE_TYPE_1D: - extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; - break; - case VK_IMAGE_TYPE_2D: - extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; - break; - case VK_IMAGE_TYPE_3D: - extent = vk_info->extent; - break; - default: - unreachable("invalid image type"); - } - - image->extent = extent; + image->extent = anv_sanitize_image_extent(vk_info->imageType, + vk_info->extent); ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[vk_info->imageType], .format = anv_get_isl_format(vk_info->format, aspect, vk_info->tiling, NULL), - .width = extent.width, - .height = extent.height, - .depth = extent.depth, + .width = image->extent.width, + .height = image->extent.height, + .depth = image->extent.depth, .levels = vk_info->mipLevels, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, diff --git a/src/intel/vulkan/anv_meta_copy.c b/src/intel/vulkan/anv_meta_copy.c index 1a2bfd6cf01..982fa7e10c1 100644 --- a/src/intel/vulkan/anv_meta_copy.c +++ b/src/intel/vulkan/anv_meta_copy.c @@ -28,16 +28,16 @@ * if Image is uncompressed or compressed, respectively. */ static struct VkExtent3D -meta_region_extent_el(const VkFormat format, +meta_region_extent_el(const struct anv_image *image, const struct VkExtent3D *extent) { const struct isl_format_layout *isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { + anv_format_for_vk_format(image->vk_format)->isl_layout; + return anv_sanitize_image_extent(image->type, (VkExtent3D) { .width = DIV_ROUND_UP(extent->width , isl_layout->bw), .height = DIV_ROUND_UP(extent->height, isl_layout->bh), .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; + }); } /* Returns the user-provided VkBufferImageCopy::imageOffset in units of @@ -49,11 +49,11 @@ meta_region_offset_el(const struct anv_image *image, const struct VkOffset3D *offset) { const struct isl_format_layout *isl_layout = image->format->isl_layout; - return (VkOffset3D) { + return anv_sanitize_image_offset(image->type, (VkOffset3D) { .x = offset->x / isl_layout->bw, .y = offset->y / isl_layout->bh, .z = offset->z / isl_layout->bd, - }; + }); } static struct anv_meta_blit2d_surf @@ -115,17 +115,28 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < regionCount; r++) { - /* Start creating blit rect */ + /** + * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images + * extent is the size in texels of the source image to copy in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + * + * + * Also, convert the offsets and extent from units of texels to units of + * blocks - which is the highest resolution accessible in this command. + */ const VkOffset3D img_offset_el = meta_region_offset_el(image, &pRegions[r].imageOffset); const VkExtent3D bufferExtent = { .width = pRegions[r].bufferRowLength, .height = pRegions[r].bufferImageHeight, }; + + /* Start creating blit rect */ const VkExtent3D buf_extent_el = - meta_region_extent_el(image->vk_format, &bufferExtent); + meta_region_extent_el(image, &bufferExtent); const VkExtent3D img_extent_el = - meta_region_extent_el(image->vk_format, &pRegions[r].imageExtent); + meta_region_extent_el(image, &pRegions[r].imageExtent); struct anv_meta_blit2d_rect rect = { .width = MAX2(buf_extent_el.width, img_extent_el.width), .height = MAX2(buf_extent_el.height, img_extent_el.height), @@ -152,7 +163,7 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y; /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; @@ -163,7 +174,7 @@ meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, pRegions[r].imageSubresource.mipLevel, pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, + img_offset_el.z + slice_3d, x_offset, y_offset); *x_offset += img_offset_el.x; @@ -259,20 +270,30 @@ void anv_CmdCopyImage( struct anv_meta_blit2d_surf b_dst = blit_surf_for_image(dest_image, dst_isl_surf); - /* Start creating blit rect */ + /** + * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images + * imageExtent is the size in texels of the image to copy in width, height + * and depth. 1D images use only x and width. 2D images use x, y, width + * and height. 3D images use x, y, z, width, height and depth. + * + * Also, convert the offsets and extent from units of texels to units of + * blocks - which is the highest resolution accessible in this command. + */ const VkOffset3D dst_offset_el = meta_region_offset_el(dest_image, &pRegions[r].dstOffset); const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); const VkExtent3D img_extent_el = - meta_region_extent_el(src_image->vk_format, &pRegions[r].extent); + meta_region_extent_el(src_image, &pRegions[r].extent); + + /* Start creating blit rect */ struct anv_meta_blit2d_rect rect = { .width = img_extent_el.width, .height = img_extent_el.height, }; /* Loop through each 3D or array slice */ - unsigned num_slices_3d = pRegions[r].extent.depth; + unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; unsigned slice_3d = 0; unsigned slice_array = 0; @@ -283,14 +304,14 @@ void anv_CmdCopyImage( pRegions[r].dstSubresource.mipLevel, pRegions[r].dstSubresource.baseArrayLayer + slice_array, - pRegions[r].dstOffset.z + slice_3d, + dst_offset_el.z + slice_3d, &rect.dst_x, &rect.dst_y); isl_surf_get_image_offset_el(src_isl_surf, pRegions[r].srcSubresource.mipLevel, pRegions[r].srcSubresource.baseArrayLayer + slice_array, - pRegions[r].srcOffset.z + slice_3d, + src_offset_el.z + slice_3d, &rect.src_x, &rect.src_y); rect.dst_x += dst_offset_el.x; diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 19fb3ad3003..f50af52ece5 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -719,6 +719,27 @@ void anv_CmdResolveImage( anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset); + /** + * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images + * + * extent is the size in texels of the source image to resolve in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + * + * srcOffset and dstOffset select the initial x, y, and z offsets in + * texels of the sub-regions of the source and destination image data. + * extent is the size in texels of the source image to resolve in width, + * height and depth. 1D images use only x and width. 2D images use x, y, + * width and height. 3D images use x, y, z, width, height and depth. + */ + const struct VkExtent3D extent = + anv_sanitize_image_extent(src_image->type, region->extent); + const struct VkOffset3D srcOffset = + anv_sanitize_image_offset(src_image->type, region->srcOffset); + const struct VkOffset3D dstOffset = + anv_sanitize_image_offset(dest_image->type, region->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) { @@ -780,12 +801,12 @@ void anv_CmdResolveImage( .framebuffer = fb_h, .renderArea = { .offset = { - region->dstOffset.x, - region->dstOffset.y, + dstOffset.x, + dstOffset.y, }, .extent = { - region->extent.width, - region->extent.height, + extent.width, + extent.height, } }, .clearValueCount = 0, @@ -796,17 +817,17 @@ void anv_CmdResolveImage( emit_resolve(cmd_buffer, &src_iview, &(VkOffset2D) { - .x = region->srcOffset.x, - .y = region->srcOffset.y, + .x = srcOffset.x, + .y = srcOffset.y, }, &dest_iview, &(VkOffset2D) { - .x = region->dstOffset.x, - .y = region->dstOffset.y, + .x = dstOffset.x, + .y = dstOffset.y, }, &(VkExtent2D) { - .width = region->extent.width, - .height = region->extent.height, + .width = extent.width, + .height = extent.height, }); ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 03e87670165..94a13d7d331 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1670,6 +1670,39 @@ struct anv_buffer_view { const struct anv_format * anv_format_for_descriptor_type(VkDescriptorType type); +static inline struct VkExtent3D +anv_sanitize_image_extent(const VkImageType imageType, + const struct VkExtent3D imageExtent) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkExtent3D) { imageExtent.width, 1, 1 }; + case VK_IMAGE_TYPE_2D: + return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 }; + case VK_IMAGE_TYPE_3D: + return imageExtent; + default: + unreachable("invalid image type"); + } +} + +static inline struct VkOffset3D +anv_sanitize_image_offset(const VkImageType imageType, + const struct VkOffset3D imageOffset) +{ + switch (imageType) { + case VK_IMAGE_TYPE_1D: + return (VkOffset3D) { imageOffset.x, 0, 0 }; + case VK_IMAGE_TYPE_2D: + return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 }; + case VK_IMAGE_TYPE_3D: + return imageOffset; + default: + unreachable("invalid image type"); + } +} + + void anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, enum isl_format format, -- cgit v1.2.3 From d353ba8f5fee23e9d9c8165b6cbfaba33e19ace6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Wed, 23 Mar 2016 23:24:25 -0700 Subject: anv: Add genxml register support Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_private.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 94a13d7d331..77f453afb36 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -813,6 +813,15 @@ __gen_combine_address(struct anv_batch *batch, void *location, #define __anv_cmd_length_bias(cmd) cmd ## _length_bias #define __anv_cmd_header(cmd) cmd ## _header #define __anv_cmd_pack(cmd) cmd ## _pack +#define __anv_reg_num(reg) reg ## _num + +#define anv_pack_struct(dst, struc, ...) do { \ + struct struc __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(struc)(NULL, dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \ + } while (0) #define anv_batch_emit(batch, cmd, ...) do { \ void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ -- cgit v1.2.3 From 8f3c23667433aacf5ad65a699c7ce082f3d6e416 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 24 Mar 2016 13:05:04 -0700 Subject: anv: Use genxml register support for L3 Cache config The programming of the L3 Cache registers should match the previous manually packed LRI values. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 53 +++++++++++++++++++++++--------------- src/intel/vulkan/gen8_cmd_buffer.c | 33 ++++++++++++------------ 2 files changed, 48 insertions(+), 38 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index dbf05d06d0f..04c1d3b3477 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -294,17 +294,10 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN7_L3SQCREG1 0xb010 -#define GEN7_L3CNTLREG2 0xb020 -#define GEN7_L3CNTLREG3 0xb024 +#define emit_lri(batch, reg, imm) \ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \ + .RegisterOffset = __anv_reg_num(reg), \ + .DataDWord = imm) void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) @@ -315,12 +308,19 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) * - src/mesa/drivers/dri/i965/gen7_l3_state.c */ - uint32_t l3c2_val = enable_slm ? - /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ - /*0x02040021*/0x010000a1 : - /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ - /*0x04080040*/0x02000030; - bool changed = cmd_buffer->state.current_l3_config != l3c2_val; + uint32_t l3cr2_slm, l3cr2_noslm; + anv_pack_struct(&l3cr2_noslm, GENX(L3CNTLREG2), + .URBAllocation = 24, + .ROAllocation = 0, + .DCAllocation = 16); + anv_pack_struct(&l3cr2_slm, GENX(L3CNTLREG2), + .SLMEnable = 1, + .URBAllocation = 16, + .URBLowBandwidth = 1, + .ROAllocation = 0, + .DCAllocation = 8); + const uint32_t l3cr2_val = enable_slm ? l3cr2_slm : l3cr2_noslm; + bool changed = cmd_buffer->state.current_l3_config != l3cr2_val; if (changed) { /* According to the hardware docs, the L3 partitioning can only be changed @@ -346,10 +346,21 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .CommandStreamerStallEnable = true); anv_finishme("write GEN7_L3SQCREG1"); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, - enable_slm ? 0x00040810 : 0x00040410); - cmd_buffer->state.current_l3_config = l3c2_val; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2_val); + + uint32_t l3cr3_slm, l3cr3_noslm; + anv_pack_struct(&l3cr3_noslm, GENX(L3CNTLREG3), + .ISAllocation = 8, + .CAllocation = 4, + .TAllocation = 8); + anv_pack_struct(&l3cr3_slm, GENX(L3CNTLREG3), + .ISAllocation = 8, + .CAllocation = 8, + .TAllocation = 8); + const uint32_t l3cr3_val = enable_slm ? l3cr3_slm : l3cr3_noslm; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3_val); + + cmd_buffer->state.current_l3_config = l3cr2_val; } } diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 87b5e340772..3fb5c276107 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -108,15 +108,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } #endif -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN8_L3CNTLREG 0x7034 +#define emit_lri(batch, reg, imm) \ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \ + .RegisterOffset = __anv_reg_num(reg), \ + .DataDWord = imm) void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) @@ -127,12 +122,16 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) * - src/mesa/drivers/dri/i965/gen7_l3_state.c */ - uint32_t val = enable_slm ? - /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ - 0x60000021 : - /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ - 0x60000060; - bool changed = cmd_buffer->state.current_l3_config != val; + uint32_t l3cr_slm, l3cr_noslm; + anv_pack_struct(&l3cr_noslm, GENX(L3CNTLREG), + .URBAllocation = 48, + .AllAllocation = 48); + anv_pack_struct(&l3cr_slm, GENX(L3CNTLREG), + .SLMEnable = 1, + .URBAllocation = 16, + .AllAllocation = 48); + const uint32_t l3cr_val = enable_slm ? l3cr_slm : l3cr_noslm; + bool changed = cmd_buffer->state.current_l3_config != l3cr_val; if (changed) { /* According to the hardware docs, the L3 partitioning can only be changed @@ -157,8 +156,8 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); - cmd_buffer->state.current_l3_config = val; + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr_val); + cmd_buffer->state.current_l3_config = l3cr_val; } } -- cgit v1.2.3 From 4eab37d6cda54a4ac600347f764ef223c3a7459f Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 21 Mar 2016 10:41:06 -0700 Subject: anv/image: Enable specifying a surface's minimum pitch This is required to create multiple, horizontally adjacent, max-width images from one blit2d surface. This is also required for more accurate width specification of surfaces within a larger surface (which is seen as the smaller surface's enclosing region). Note that anv_image_create_info::stride has been unused since commit, b36938964063a4072abfd779f5607743dbc3b6f1 . Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b47425bd0e1..266fbe73ddc 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -138,7 +138,7 @@ make_surface(const struct anv_device *dev, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, .min_alignment = 0, - .min_pitch = 0, + .min_pitch = anv_info->stride, .usage = choose_isl_surf_usage(image->usage, aspect), .tiling_flags = tiling_flags); -- cgit v1.2.3 From 0e82896a116ea0212dfcb13fb1456c93732d8564 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 18 Mar 2016 15:12:32 -0700 Subject: anv/blit2d: Add a function to create an ImageView This function differs from the open-coded implementation in that the ImageView's width is determined by the caller and is not unconditionally set to match the number of texels within the surface's pitch. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_meta_blit2d.c | 196 ++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 113 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 87c3358f045..734ba8ec1d6 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -54,6 +54,81 @@ vk_format_for_size(int bs) } } +static void +create_iview(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *surf, + struct anv_meta_blit2d_rect *rect, + VkImageUsageFlags usage, + VkImage *img, + struct anv_image_view *iview) +{ + struct isl_tile_info tile_info; + isl_tiling_get_info(&cmd_buffer->device->isl_dev, + surf->tiling, surf->bs, &tile_info); + const unsigned tile_width_px = tile_info.width > surf->bs ? + tile_info.width / surf->bs : 1; + uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? + &rect->src_y : &rect->dst_y; + uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? + &rect->src_x : &rect->dst_x; + + /* Define the shared state among all created image views */ + const VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = vk_format_for_size(surf->bs), + .extent = { + .width = rect->width + (*rect_x) % tile_width_px, + .height = rect->height + (*rect_y) % tile_info.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = surf->tiling == ISL_TILING_LINEAR ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + }; + + /* Create the VkImage that is bound to the surface's memory. */ + anv_image_create(anv_device_to_handle(cmd_buffer->device), + &(struct anv_image_create_info) { + .vk_info = &image_info, + .isl_tiling_flags = 1 << surf->tiling, + .stride = surf->pitch, + }, &cmd_buffer->pool->alloc, img); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(*img)->bo = surf->bo; + anv_image_from_handle(*img)->offset = surf->base_offset; + + /* Create a VkImageView that starts at the tile aligned offset closest + * to the provided x/y offset into the surface. + */ + uint32_t img_o = 0; + isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, + &anv_image_from_handle(*img)-> + color_surface.isl, + *rect_x, *rect_y, + &img_o, rect_x, rect_y); + anv_image_view_init(iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = *img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_info.format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, cmd_buffer, img_o, usage); +} + static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, @@ -260,132 +335,27 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_rect *rects) { VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkFormat src_format = vk_format_for_size(src->bs); - VkFormat dst_format = vk_format_for_size(dst->bs); VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { - - /* Create VkImages */ - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = 0, /* TEMPLATE */ - .extent = { - .width = 0, /* TEMPLATE */ - .height = 0, /* TEMPLATE */ - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = 0, /* TEMPLATE */ - .usage = 0, /* TEMPLATE */ - }; - struct anv_image_create_info anv_image_info = { - .vk_info = &image_info, - .isl_tiling_flags = 0, /* TEMPLATE */ - }; - - /* The image height is the rect height + src/dst y-offset from the - * tile-aligned base address. - */ - struct isl_tile_info tile_info; - - anv_image_info.isl_tiling_flags = 1 << src->tiling; - image_info.tiling = src->tiling == ISL_TILING_LINEAR ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = src_usage; - image_info.format = src_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs, - &tile_info); - image_info.extent.height = rects[r].height + - rects[r].src_y % tile_info.height; - image_info.extent.width = src->pitch / src->bs; - VkImage src_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &src_image); - - anv_image_info.isl_tiling_flags = 1 << dst->tiling; - image_info.tiling = dst->tiling == ISL_TILING_LINEAR ? - VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - image_info.usage = dst_usage; - image_info.format = dst_format, - isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs, - &tile_info); - image_info.extent.height = rects[r].height + - rects[r].dst_y % tile_info.height; - image_info.extent.width = dst->pitch / dst->bs; - VkImage dst_image; - anv_image_create(vk_device, &anv_image_info, - &cmd_buffer->pool->alloc, &dst_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src->bo; - anv_image_from_handle(src_image)->offset = src->base_offset; - anv_image_from_handle(dst_image)->bo = dst->bo; - anv_image_from_handle(dst_image)->offset = dst->base_offset; - - /* Create VkImageViews */ - VkImageViewCreateInfo iview_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = 0, /* TEMPLATE */ - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = 0, /* TEMPLATE */ - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }; - uint32_t img_o = 0; - - iview_info.image = src_image; - iview_info.format = src_format; - VkOffset3D src_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(src_image)-> - color_surface.isl, - rects[r].src_x, - rects[r].src_y, - &img_o, - (uint32_t*)&src_offset_el.x, - (uint32_t*)&src_offset_el.y); - + VkImage src_img; + VkImage dst_img; struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, src_usage); - - iview_info.image = dst_image; - iview_info.format = dst_format; - VkOffset3D dst_offset_el = {0}; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(dst_image)-> - color_surface.isl, - rects[r].dst_x, - rects[r].dst_y, - &img_o, - (uint32_t*)&dst_offset_el.x, - (uint32_t*)&dst_offset_el.y); struct anv_image_view dst_iview; - anv_image_view_init(&dst_iview, cmd_buffer->device, - &iview_info, cmd_buffer, img_o, dst_usage); + create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); + create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); /* Perform blit */ meta_emit_blit2d(cmd_buffer, &src_iview, - src_offset_el, + (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, &dst_iview, - dst_offset_el, + (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, (VkExtent3D){rects[r].width, rects[r].height, 1}); - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } -- cgit v1.2.3 From 5879cb0251e7b4593eb4fd01684bd68f0945e3d1 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 12:08:49 -0700 Subject: anv: Fix cache pollution race during L3 partitioning set-up. Port 0aa4f99f562a05880a779707cbcd46be459863bf to anv. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 36 ++++++++++++++++++++++++++---------- src/intel/vulkan/gen8_cmd_buffer.c | 36 ++++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 20 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 04c1d3b3477..06b3a75cbef 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -323,22 +323,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) bool changed = cmd_buffer->state.current_l3_config != l3cr2_val; if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + /* According to the hardware docs, the L3 partitioning can only be + * changed while the pipeline is completely drained and the caches are + * flushed, which involves a first PIPE_CONTROL flush which stalls the + * pipeline... */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, .DCFlushEnable = true, .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO + * invalidation happens at the top of the pipeline (i.e. right away as + * the PIPE_CONTROL command is processed by the CS) we cannot combine it + * with the previous stalling flush as the hardware documentation + * suggests, because that would cause the CS to stall on previous + * rendering *after* RO invalidation and wouldn't prevent the RO caches + * from being polluted by concurrent rendering before the stall + * completes. This intentionally doesn't implement the SKL+ hardware + * workaround suggesting to enable CS stall on PIPE_CONTROLs with the + * texture cache invalidation bit set for GPGPU workloads because the + * previous and subsequent PIPE_CONTROLs already guarantee that there is + * no concurrent GPGPU kernel execution (see SKL HSD 2132585). + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); + + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DCFlushEnable = true, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 3fb5c276107..dab1d7411e7 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -134,22 +134,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) bool changed = cmd_buffer->state.current_l3_config != l3cr_val; if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + /* According to the hardware docs, the L3 partitioning can only be + * changed while the pipeline is completely drained and the caches are + * flushed, which involves a first PIPE_CONTROL flush which stalls the + * pipeline... */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, .DCFlushEnable = true, .PostSyncOperation = NoWrite, .CommandStreamerStallEnable = true); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO + * invalidation happens at the top of the pipeline (i.e. right away as + * the PIPE_CONTROL command is processed by the CS) we cannot combine it + * with the previous stalling flush as the hardware documentation + * suggests, because that would cause the CS to stall on previous + * rendering *after* RO invalidation and wouldn't prevent the RO caches + * from being polluted by concurrent rendering before the stall + * completes. This intentionally doesn't implement the SKL+ hardware + * workaround suggesting to enable CS stall on PIPE_CONTROLs with the + * texture cache invalidation bit set for GPGPU workloads because the + * previous and subsequent PIPE_CONTROLs already guarantee that there is + * no concurrent GPGPU kernel execution (see SKL HSD 2132585). + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .PostSyncOperation = NoWrite); + + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), .DCFlushEnable = true, -- cgit v1.2.3 From f60683b32a45081979df089894d36d05f2d0ec0c Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 12:08:31 -0700 Subject: anv: Invalidate state cache before L3 partitioning set-up. Port 10d84ba9f084174a1e8e7639dfb05dd855ba86e8 to anv. Signed-off-by: Jordan Justen --- src/intel/vulkan/gen7_cmd_buffer.c | 1 + src/intel/vulkan/gen8_cmd_buffer.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 06b3a75cbef..b5d21efb203 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -351,6 +351,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .TextureCacheInvalidationEnable = true, .ConstantCacheInvalidationEnable = true, .InstructionCacheInvalidateEnable = true, + .StateCacheInvalidationEnable = true, .PostSyncOperation = NoWrite); /* Now send a third stalling flush to make sure that invalidation is diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index dab1d7411e7..5b6afb3d70d 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -162,6 +162,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) .TextureCacheInvalidationEnable = true, .ConstantCacheInvalidationEnable = true, .InstructionCacheInvalidateEnable = true, + .StateCacheInvalidationEnable = true, .PostSyncOperation = NoWrite); /* Now send a third stalling flush to make sure that invalidation is -- cgit v1.2.3 From 1a3adae84aa16247cba0e3619d54e6d8d543fcf1 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 14:45:24 -0700 Subject: anv/gen7: Save kernel command parser version Signed-off-by: Jordan Justen --- src/intel/vulkan/anv_device.c | 11 +++++++++++ src/intel/vulkan/anv_private.h | 1 + 2 files changed, 12 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 54a1f1274ab..01d3c996ad2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -99,6 +99,17 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } + device->cmd_parser_version = -1; + if (device->info->gen == 7) { + device->cmd_parser_version = + anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION); + if (device->cmd_parser_version == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get command parser version"); + goto fail; + } + } + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "failed to get aperture size: %m"); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 77f453afb36..48ebff456e3 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -546,6 +546,7 @@ struct anv_physical_device { uint64_t aperture_size; struct brw_compiler * compiler; struct isl_device isl_dev; + int cmd_parser_version; }; struct anv_wsi_interaface; -- cgit v1.2.3 From 8dbfa265a439904628c2d875885e80bc45a90a05 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 14:49:56 -0700 Subject: anv/gen7: DispatchIndirect requires cmd parser 5 Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1b53f85419b..35c46008a84 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -638,6 +638,24 @@ void genX(CmdDrawIndexedIndirect)( .PrimitiveTopologyType = pipeline->topology); } +#if GEN_GEN == 7 + +static bool +verify_cmd_parser(const struct anv_device *device, + int required_version, + const char *function) +{ + if (device->instance->physicalDevice.cmd_parser_version < required_version) { + vk_errorf(VK_ERROR_FEATURE_NOT_PRESENT, + "cmd parser version %d is required for %s", + required_version, function); + return false; + } else { + return true; + } +} + +#endif void genX(CmdDispatch)( VkCommandBuffer commandBuffer, @@ -699,6 +717,14 @@ void genX(CmdDispatchIndirect)( uint32_t bo_offset = buffer->offset + offset; struct anv_batch *batch = &cmd_buffer->batch; +#if GEN_GEN == 7 + /* Linux 4.4 added command parser version 5 which allows the GPGPU + * indirect dispatch registers to be written. + */ + if (verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) + return; +#endif + if (prog_data->uses_num_work_groups) { cmd_buffer->state.num_workgroups_offset = bo_offset; cmd_buffer->state.num_workgroups_bo = bo; -- cgit v1.2.3 From f56f538ce4753a6fdd969b610f35433fd657e4ee Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 28 Mar 2016 22:26:47 -0700 Subject: anv/gen7: Fix command parser version test with indirect dispatch Caught-by: Ilia Mirkin Signed-off-by: Jordan Justen --- src/intel/vulkan/genX_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 35c46008a84..d642832dd57 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -721,7 +721,7 @@ void genX(CmdDispatchIndirect)( /* Linux 4.4 added command parser version 5 which allows the GPGPU * indirect dispatch registers to be written. */ - if (verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) + if (!verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect")) return; #endif -- cgit v1.2.3 From dd6f7200466b06e13f7c9cd70710bc2c57433706 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:20:26 -0700 Subject: anv/blit2d: Remove the tex_dim parameter from copy_fragment_shader Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 734ba8ec1d6..f40dc2f7828 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -390,7 +390,7 @@ build_nir_vertex_shader(void) } static nir_shader * -build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +build_nir_copy_fragment_shader() { const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); @@ -405,7 +405,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, glsl_get_base_type(vec4)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -413,7 +413,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) sampler->data.binding = 0; nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); - tex->sampler_dim = tex_dim; + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); @@ -501,7 +501,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }; struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + .nir = build_nir_copy_fragment_shader(), }; VkPipelineVertexInputStateCreateInfo vi_create_info = { -- cgit v1.2.3 From b38a0d64ba2274c3d48b731e86a7bbb04fca81c8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:25:47 -0700 Subject: anv/meta2d: Don't declare an array sampler in the fragment shader With the new blit framework we aren't using array textures and, from talking with Nanley, we don't think it's going to be useful in the future either. Just get rid of it for now. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index f40dc2f7828..10e9ba3befd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -393,19 +393,19 @@ static nir_shader * build_nir_copy_fragment_shader() { const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec3, "v_tex_pos"); + vec2, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, glsl_get_base_type(vec4)); nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); @@ -420,7 +420,7 @@ build_nir_copy_fragment_shader() tex->src[1].src_type = nir_tex_src_lod; tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); tex->dest_type = nir_type_float; /* TODO */ - tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->is_array = false; tex->coord_components = tex_pos->num_components; tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = NULL; -- cgit v1.2.3 From 9553fd2c97bda18b997845610be365d6adf0fd4c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:39:17 -0700 Subject: anv/blit2d: Fix a NIR writemask Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 10e9ba3befd..144a62481b8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -431,7 +431,7 @@ build_nir_copy_fragment_shader() nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); + nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); return b.shader; } -- cgit v1.2.3 From afada45590789191e96860851df4696170e4231c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 14:06:32 -0700 Subject: anv/blit2d: Fix whitespace Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 144a62481b8..6a7845fd30f 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -131,11 +131,11 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D extent) + struct anv_image_view *src_iview, + VkOffset3D src_offset, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D extent) { struct anv_device *device = cmd_buffer->device; @@ -348,11 +348,11 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* Perform blit */ meta_emit_blit2d(cmd_buffer, - &src_iview, - (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, - &dst_iview, - (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, - (VkExtent3D){rects[r].width, rects[r].height, 1}); + &src_iview, + (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, + &dst_iview, + (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, + (VkExtent3D){rects[r].width, rects[r].height, 1}); anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); -- cgit v1.2.3 From e466164c87b95e5a5ef6263ad3f62c37d37b9488 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 15:37:47 -0700 Subject: anv/blit2d: Break the texelfetch portion of shader building into a helper Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 59 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 23 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6a7845fd30f..d3b0adce641 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -389,49 +389,62 @@ build_nir_vertex_shader(void) return b.shader; } -static nir_shader * -build_nir_copy_fragment_shader() -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec2, "v_tex_pos"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); +typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *, + struct anv_device *, + nir_ssa_def *, nir_ssa_def *); +static nir_ssa_def * +build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, - glsl_get_base_type(vec4)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; sampler->data.binding = 0; - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); tex->dest_type = nir_type_float; /* TODO */ tex->is_array = false; - tex->coord_components = tex_pos->num_components; + tex->coord_components = 2; tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +static nir_shader * +build_nir_copy_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec2, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); + + nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + nir_ssa_def *color = txf_func(&b, device, tex_pos, NULL); + nir_store_var(&b, color_out, color, 0xf); return b.shader; } @@ -501,7 +514,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }; struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(), + .nir = build_nir_copy_fragment_shader(device, build_nir_texel_fetch), }; VkPipelineVertexInputStateCreateInfo vi_create_info = { -- cgit v1.2.3 From b0a6cfb9b46dcd6b1c60d85c0c4b4ca119d53e5c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 29 Mar 2016 14:24:31 -0700 Subject: anv/blit2d: Pass the source pitch into the shader Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index d3b0adce641..a78536f4e52 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -132,6 +132,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, static void meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_image_view *src_iview, + uint32_t src_pitch, VkOffset3D src_offset, struct anv_image_view *dest_iview, VkOffset3D dest_offset, @@ -159,7 +160,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x + extent.width, src_offset.y + extent.height, - src_offset.z, + src_pitch, }, }; @@ -171,7 +172,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x, src_offset.y + extent.height, - src_offset.z, + src_pitch, }, }; @@ -183,7 +184,7 @@ meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, .tex_coord = { src_offset.x, src_offset.y, - src_offset.z, + src_pitch, }, }; @@ -348,7 +349,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* Perform blit */ meta_emit_blit2d(cmd_buffer, - &src_iview, + &src_iview, src->pitch, (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, &dst_iview, (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, @@ -428,22 +429,26 @@ build_nir_copy_fragment_shader(struct anv_device *device, texel_fetch_build_func txf_func) { const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec2, "v_tex_pos"); + vec3, "v_tex_pos"); tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); - nir_ssa_def *color = txf_func(&b, device, tex_pos, NULL); + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + unsigned swiz[4] = { 0, 1 }; + nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); + nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); nir_store_var(&b, color_out, color, 0xf); return b.shader; -- cgit v1.2.3 From 5187ab05b8f0719b0ecb922e36c5d5c3d118ea31 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 11:51:50 -0700 Subject: anv/blit2d: Inline meta_emit_blit2d Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 361 +++++++++++++++++-------------------- 1 file changed, 170 insertions(+), 191 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index a78536f4e52..fc72f7808f8 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -129,190 +129,6 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer, img_o, usage); } -static void -meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - uint32_t src_pitch, - VkOffset3D src_offset, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D extent) -{ - struct anv_device *device = cmd_buffer->device; - - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; - - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + extent.width, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x + extent.width, - src_offset.y + extent.height, - src_pitch, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x, - src_offset.y + extent.height, - src_pitch, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - src_offset.x, - src_offset.y, - src_pitch, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); - - VkDescriptorPool desc_pool; - anv_CreateDescriptorPool(anv_device_to_handle(device), - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &desc_pool); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout - }, &set); - - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = NULL, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit2d.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { extent.width, extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); - - VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, - &set, 0, NULL); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_DestroyDescriptorPool(anv_device_to_handle(device), - desc_pool, &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); -} - void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -335,6 +151,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, unsigned num_rects, struct anv_meta_blit2d_rect *rects) { + struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -347,13 +164,175 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); - /* Perform blit */ - meta_emit_blit2d(cmd_buffer, - &src_iview, src->pitch, - (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, - &dst_iview, - (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, - (VkExtent3D){rects[r].width, rects[r].height, 1}); + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x + rects[r].width, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y, + src->pitch, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkDescriptorPool desc_pool; + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &desc_pool); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout + }, &set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(vk_device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dst_iview), + }, + .width = dst_iview.extent.width, + .height = dst_iview.extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { rects[r].dst_x, rects[r].dst_y, }, + .extent = { rects[r].width, rects[r].height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dst_iview.extent.width, + .height = dst_iview.extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_DestroyDescriptorPool(vk_device, desc_pool, &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); -- cgit v1.2.3 From 43fbdd7156197518c2f2f8a1c0befa7f4ffd16e1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:32:08 -0700 Subject: anv/blit2d: Factor binding the source image into a helper Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 139 ++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 57 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index fc72f7808f8..7424a00b7f2 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -129,6 +129,84 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, }, cmd_buffer, img_o, usage); } +struct blit2d_src_temps { + VkImage image; + struct anv_image_view iview; + VkDescriptorPool desc_pool; + VkDescriptorSet set; +}; + +static void +blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_rect *rect, + struct blit2d_src_temps *tmp) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + &tmp->image, &tmp->iview); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.pipeline_layout, 0, 1, + &tmp->set, 0, NULL); +} + +static void +blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, + struct blit2d_src_temps *tmp) +{ + anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), + tmp->desc_pool, &cmd_buffer->pool->alloc); + anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), + tmp->image, &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -153,15 +231,14 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { - VkImage src_img; + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, &rects[r], &src_temps); + VkImage dst_img; - struct anv_image_view src_iview; struct anv_image_view dst_iview; - create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); struct blit_vb_data { @@ -231,51 +308,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkDescriptorPool desc_pool; - anv_CreateDescriptorPool(vk_device, - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &desc_pool); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(vk_device, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout - }, &set); - - anv_UpdateDescriptorSets(vk_device, - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = NULL, - .imageView = anv_image_view_to_handle(&src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - VkFramebuffer fb; anv_CreateFramebuffer(vk_device, &(VkFramebufferCreateInfo) { @@ -319,11 +351,6 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .maxDepth = 1.0f, }); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, - &set, 0, NULL); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); @@ -331,10 +358,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - anv_DestroyDescriptorPool(vk_device, desc_pool, &cmd_buffer->pool->alloc); + blit2d_unbind_src(cmd_buffer, &src_temps); anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); - - anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } -- cgit v1.2.3 From 00e70868ee81f964bbdb33b40938eec7e4bef1f3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:47:41 -0700 Subject: anv/blit2d: Enhance teardown and clean up init error paths Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 117 ++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 60 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 7424a00b7f2..3dc0c66d1cc 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -461,18 +461,29 @@ build_nir_copy_fragment_shader(struct anv_device *device, void anv_device_finish_meta_blit2d_state(struct anv_device *device) { - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); + if (device->meta_state.blit2d.render_pass) { + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit2d.render_pass, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.pipeline_2d_src) { + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit2d.pipeline_2d_src, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.pipeline_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.pipeline_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.ds_layout, + &device->meta_state.alloc); + } } VkResult @@ -480,6 +491,8 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) { VkResult result; + zero(device->meta_state.blit2d); + result = anv_CreateRenderPass(anv_device_to_handle(device), &(VkRenderPassCreateInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, @@ -513,6 +526,33 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) if (result != VK_SUCCESS) goto fail; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); + if (result != VK_SUCCESS) + goto fail; + /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need * to provide GLSL source for the vertex shader so that the compiler @@ -567,36 +607,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) } }; - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit2d.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -687,29 +697,16 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; ralloc_free(vs.nir); ralloc_free(fs_2d.nir); - return VK_SUCCESS; + if (result != VK_SUCCESS) + goto fail; - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); + return VK_SUCCESS; - ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); - fail: +fail: + anv_device_finish_meta_blit2d_state(device); return result; } -- cgit v1.2.3 From 28eb02e345c5642d49037759b5b0eee8d71e7feb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:51:21 -0700 Subject: anv/blit2d: Rename the descriptor set and pipeline layouts Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 20 ++++++++++---------- src/intel/vulkan/anv_private.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 3dc0c66d1cc..878ae3f096e 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -168,7 +168,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = tmp->desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout + .pSetLayouts = &device->meta_state.blit2d.image_ds_layout }, &tmp->set); anv_UpdateDescriptorSets(vk_device, @@ -193,7 +193,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, + device->meta_state.blit2d.image_p_layout, 0, 1, &tmp->set, 0, NULL); } @@ -473,15 +473,15 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.pipeline_layout) { + if (device->meta_state.blit2d.image_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, + device->meta_state.blit2d.image_p_layout, &device->meta_state.alloc); } - if (device->meta_state.blit2d.ds_layout) { + if (device->meta_state.blit2d.image_ds_layout) { anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, + device->meta_state.blit2d.image_ds_layout, &device->meta_state.alloc); } } @@ -539,7 +539,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pImmutableSamplers = NULL }, } - }, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layout); + }, &device->meta_state.alloc, &device->meta_state.blit2d.image_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -547,9 +547,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout, + .pSetLayouts = &device->meta_state.blit2d.image_ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.image_p_layout); if (result != VK_SUCCESS) goto fail; @@ -678,7 +678,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit2d.pipeline_layout, + .layout = device->meta_state.blit2d.image_p_layout, .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 48ebff456e3..9e3978a4691 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -612,8 +612,8 @@ struct anv_meta_state { /** Pipeline that copies from a 2D image. */ VkPipeline pipeline_2d_src; - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; + VkPipelineLayout image_p_layout; + VkDescriptorSetLayout image_ds_layout; } blit2d; struct { -- cgit v1.2.3 From 85b9a007acb9bf53e509974f4112accb8e9a29f4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 13:54:56 -0700 Subject: anv/blit2d: Add layouts for using a texel buffer source Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 59 +++++++++++++++++++++++++++++++------- src/intel/vulkan/anv_private.h | 6 ++-- 2 files changed, 53 insertions(+), 12 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 878ae3f096e..22b763aee2d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -168,7 +168,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = tmp->desc_pool, .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.image_ds_layout + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout }, &tmp->set); anv_UpdateDescriptorSets(vk_device, @@ -193,7 +193,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.image_p_layout, 0, 1, + device->meta_state.blit2d.img_p_layout, 0, 1, &tmp->set, 0, NULL); } @@ -473,15 +473,27 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.image_p_layout) { + if (device->meta_state.blit2d.img_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.image_p_layout, + device->meta_state.blit2d.img_p_layout, &device->meta_state.alloc); } - if (device->meta_state.blit2d.image_ds_layout) { + if (device->meta_state.blit2d.img_ds_layout) { anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.image_ds_layout, + device->meta_state.blit2d.img_ds_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_p_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_p_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_ds_layout, &device->meta_state.alloc); } } @@ -539,7 +551,34 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .pImmutableSamplers = NULL }, } - }, &device->meta_state.alloc, &device->meta_state.blit2d.image_ds_layout); + }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); if (result != VK_SUCCESS) goto fail; @@ -547,9 +586,9 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) &(VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.image_ds_layout, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, }, - &device->meta_state.alloc, &device->meta_state.blit2d.image_p_layout); + &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); if (result != VK_SUCCESS) goto fail; @@ -678,7 +717,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit2d.image_p_layout, + .layout = device->meta_state.blit2d.img_p_layout, .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9e3978a4691..7c140a33cb7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -612,8 +612,10 @@ struct anv_meta_state { /** Pipeline that copies from a 2D image. */ VkPipeline pipeline_2d_src; - VkPipelineLayout image_p_layout; - VkDescriptorSetLayout image_ds_layout; + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipelineLayout buf_p_layout; + VkDescriptorSetLayout buf_ds_layout; } blit2d; struct { -- cgit v1.2.3 From 4ee80e8816091869943d98cbe261c49406bb8039 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 15:03:34 -0700 Subject: anv/blit2d: Refactor in preparation for different src/dst types Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 349 ++++++++++++++++++++++++------------- src/intel/vulkan/anv_private.h | 9 +- 2 files changed, 238 insertions(+), 120 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 22b763aee2d..8e63eee462d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -24,6 +24,44 @@ #include "anv_meta.h" #include "nir/nir_builder.h" +enum blit2d_src_type { + /* We can make a "normal" image view of this source and just texture + * from it like you would in any other shader. + */ + BLIT2D_SRC_TYPE_NORMAL, + + /* The source is W-tiled and we need to detile manually in the shader. + * This will work on any platform but is needed for all W-tiled sources + * prior to Broadwell. + */ + BLIT2D_SRC_TYPE_W_DETILE, + + BLIT2D_NUM_SRC_TYPES, +}; + +enum blit2d_dst_type { + /* We can bind this destination as a "normal" render target and render + * to it just like you would anywhere else. + */ + BLIT2D_DST_TYPE_NORMAL, + + /* The destination is W-tiled and we need to do the tiling manually in + * the shader. This is required for all W-tiled destinations. + * + * Sky Lake adds a feature for providing explicit stencil values in the + * shader but mesa doesn't support that yet so neither do we. + */ + BLIT2D_DST_TYPE_W_TILE, + + /* The destination has a 3-channel RGB format. Since we can't render to + * non-power-of-two textures, we have to bind it as a red texture and + * select the correct component for the given red pixel in the shader. + */ + BLIT2D_DST_TYPE_RGB, + + BLIT2D_NUM_DST_TYPES, +}; + static VkFormat vk_format_for_size(int bs) { @@ -139,6 +177,7 @@ struct blit2d_src_temps { static void blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, struct anv_meta_blit2d_rect *rect, struct blit2d_src_temps *tmp) { @@ -199,6 +238,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, static void blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, struct blit2d_src_temps *tmp) { anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), @@ -222,12 +262,27 @@ anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT)); } -void -anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_blit2d_surf *src, - struct anv_meta_blit2d_surf *dst, - unsigned num_rects, - struct anv_meta_blit2d_rect *rects) +static void +bind_pipeline(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) +{ + VkPipeline pipeline = + cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type]; + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } +} + +static void +anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); @@ -235,7 +290,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; - blit2d_bind_src(cmd_buffer, src, &rects[r], &src_temps); + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); VkImage dst_img; struct anv_image_view dst_iview; @@ -334,12 +389,7 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, .pClearValues = NULL, }, VK_SUBPASS_CONTENTS_INLINE); - VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL); anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { @@ -358,12 +408,39 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, /* At the point where we emit the draw call, all data from the * descriptor sets, etc. has been used. We are free to delete it. */ - blit2d_unbind_src(cmd_buffer, &src_temps); + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); } } +void +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + enum blit2d_src_type src_type; + if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) { + src_type = BLIT2D_SRC_TYPE_W_DETILE; + } else { + src_type = BLIT2D_SRC_TYPE_NORMAL; + } + + if (dst->tiling == ISL_TILING_W) { + assert(dst->bs == 1); + anv_finishme("Blitting to w-tiled destinations not yet supported"); + return; + } else if (dst->bs % 3 == 0) { + anv_finishme("Blitting to RGB destinations not yet supported"); + return; + } else { + assert(util_is_power_of_two(dst->bs)); + anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); + } +} static nir_shader * build_nir_vertex_shader(void) @@ -467,12 +544,6 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) &device->meta_state.alloc); } - if (device->meta_state.blit2d.pipeline_2d_src) { - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_2d_src, - &device->meta_state.alloc); - } - if (device->meta_state.blit2d.img_p_layout) { anv_DestroyPipelineLayout(anv_device_to_handle(device), device->meta_state.blit2d.img_p_layout, @@ -496,101 +567,47 @@ anv_device_finish_meta_blit2d_state(struct anv_device *device) device->meta_state.blit2d.buf_ds_layout, &device->meta_state.alloc); } + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + if (device->meta_state.blit2d.pipelines[src][dst]) { + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit2d.pipelines[src][dst], + &device->meta_state.alloc); + } + } + } } -VkResult -anv_device_init_meta_blit2d_state(struct anv_device *device) +static VkResult +blit2d_init_pipeline(struct anv_device *device, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) { VkResult result; - zero(device->meta_state.blit2d); - - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); - if (result != VK_SUCCESS) - goto fail; - - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); - if (result != VK_SUCCESS) - goto fail; + texel_fetch_build_func src_func; + switch (src_type) { + case BLIT2D_SRC_TYPE_NORMAL: + src_func = build_nir_texel_fetch; + break; + case BLIT2D_SRC_TYPE_W_DETILE: + /* Not yet supported */ + default: + return VK_SUCCESS; + } - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); - if (result != VK_SUCCESS) - goto fail; + struct anv_shader_module fs = { .nir = NULL }; + switch (dst_type) { + case BLIT2D_DST_TYPE_NORMAL: + fs.nir = build_nir_copy_fragment_shader(device, src_func); + break; + case BLIT2D_DST_TYPE_W_TILE: + case BLIT2D_DST_TYPE_RGB: + /* Not yet supported */ + default: + return VK_SUCCESS; + } /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need @@ -601,10 +618,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .nir = build_nir_vertex_shader(), }; - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(device, build_nir_texel_fetch), - }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -656,7 +669,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .module = anv_shader_module_to_handle(&fs), .pName = "main", .pSpecializationInfo = NULL }, @@ -731,18 +744,120 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .use_rectlist = true }; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); + &device->meta_state.alloc, + &device->meta_state.blit2d.pipelines[src_type][dst_type]); ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); + ralloc_free(fs.nir); + + return result; +} + +VkResult +anv_device_init_meta_blit2d_state(struct anv_device *device) +{ + VkResult result; + zero(device->meta_state.blit2d); + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); if (result != VK_SUCCESS) goto fail; + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); + if (result != VK_SUCCESS) + goto fail; + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + result = blit2d_init_pipeline(device, src, dst); + if (result != VK_SUCCESS) + goto fail; + } + } + return VK_SUCCESS; fail: diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7c140a33cb7..1a18dd15e65 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -609,13 +609,16 @@ struct anv_meta_state { struct { VkRenderPass render_pass; - /** Pipeline that copies from a 2D image. */ - VkPipeline pipeline_2d_src; - VkPipelineLayout img_p_layout; VkDescriptorSetLayout img_ds_layout; VkPipelineLayout buf_p_layout; VkDescriptorSetLayout buf_ds_layout; + + /* Pipelines indexed by source and destination type. See the + * blit2d_src_type and blit2d_dst_type enums in anv_meta_blit2d.c to + * see what these mean. + */ + VkPipeline pipelines[2][3]; } blit2d; struct { -- cgit v1.2.3 From 4caba940869602b750e21a444523b068b1bea339 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 15:20:11 -0700 Subject: anv/image: Expose the guts of CreateBufferView for meta Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 40 ++++++++++++++++++++++++---------------- src/intel/vulkan/anv_private.h | 5 +++++ 2 files changed, 29 insertions(+), 16 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 266fbe73ddc..759c8612005 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -646,20 +646,13 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, anv_free2(&device->alloc, pAllocator, iview); } -VkResult -anv_CreateBufferView(VkDevice _device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) + +void anv_buffer_view_init(struct anv_buffer_view *view, + struct anv_device *device, + const VkBufferViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!view) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); @@ -671,8 +664,7 @@ anv_CreateBufferView(VkDevice _device, buffer->size - view->offset : pCreateInfo->range; if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_buffer_surface_state(device, view->surface_state, view->format, @@ -683,8 +675,7 @@ anv_CreateBufferView(VkDevice _device, } if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - view->storage_surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->storage_surface_state = alloc_surface_state(device, cmd_buffer); enum isl_format storage_format = has_matching_storage_typed_format(device, view->format) ? @@ -703,6 +694,23 @@ anv_CreateBufferView(VkDevice _device, } else { view->storage_surface_state = (struct anv_state){ 0 }; } +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_buffer_view_init(view, device, pCreateInfo, NULL); *pView = anv_buffer_view_to_handle(view); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1a18dd15e65..a394fe8a683 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1682,6 +1682,11 @@ struct anv_buffer_view { struct brw_image_param storage_image_param; }; +void anv_buffer_view_init(struct anv_buffer_view *view, + struct anv_device *device, + const VkBufferViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + const struct anv_format * anv_format_for_descriptor_type(VkDescriptorType type); -- cgit v1.2.3 From b37502b9832f02626b0caca22500b46ebbbe8007 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 16:15:54 -0700 Subject: isl: Rework the get_intratile_offset function The old function tried to work in elements which isn't, strictly speaking, a valid thing to do. In the case of a non-power-of-two format, there is no guarantee that the x offset into the tile is a multiple of the format block size. This commit refactors it to work entirely in terms of a tiling (not a surface) and bytes/rows. Reviewed-by: Nanley Chery --- src/intel/isl/isl.c | 52 +++++++++++++++++++++++--------------- src/intel/isl/isl.h | 33 +++++++++++++++--------- src/intel/vulkan/anv_meta_blit2d.c | 13 ++++++---- 3 files changed, 60 insertions(+), 38 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index a36638071d5..37d8bcba078 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1417,33 +1417,39 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, } void -isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t total_x_offset_el, - uint32_t total_y_offset_el, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el) +isl_tiling_get_intratile_offset_el(const struct isl_device *dev, + enum isl_tiling tiling, + uint8_t bs, + uint32_t row_pitch, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) { - const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - struct isl_tile_info tile_info; - isl_surf_get_tile_info(dev, surf, &tile_info); + isl_tiling_get_info(dev, tiling, bs, &tile_info); + + /* This function only really works for power-of-two surfaces. In + * theory, we could make it work for non-power-of-two surfaces by going + * to the left until we find a block that is bs-aligned. The Vulkan + * driver doesn't use non-power-of-two tiled surfaces so we'll leave + * this unimplemented for now. + */ + assert(tiling == ISL_TILING_LINEAR || isl_is_pow2(bs)); uint32_t small_y_offset_el = total_y_offset_el % tile_info.height; uint32_t big_y_offset_el = total_y_offset_el - small_y_offset_el; - uint32_t big_y_offset_B = big_y_offset_el * surf->row_pitch; + uint32_t big_y_offset_B = big_y_offset_el * row_pitch; - uint32_t total_x_offset_B = total_x_offset_el * fmtl->bs; + uint32_t total_x_offset_B = total_x_offset_el * bs; uint32_t small_x_offset_B = total_x_offset_B % tile_info.width; - uint32_t small_x_offset_el = small_x_offset_B / fmtl->bs; + uint32_t small_x_offset_el = small_x_offset_B / bs; uint32_t big_x_offset_B = (total_x_offset_B / tile_info.width) * tile_info.size; *base_address_offset = big_y_offset_B + big_x_offset_B; *x_offset_el = small_x_offset_el; *y_offset_el = small_y_offset_el; - - } void @@ -1456,6 +1462,8 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el) { + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + uint32_t total_x_offset_el; uint32_t total_y_offset_el; isl_surf_get_image_offset_el(surf, level, @@ -1464,12 +1472,14 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, &total_x_offset_el, &total_y_offset_el); - isl_surf_get_image_intratile_offset_el_xy(dev, surf, - total_x_offset_el, - total_y_offset_el, - base_address_offset, - x_offset_el, - y_offset_el); + + isl_tiling_get_intratile_offset_el(dev, surf->tiling, fmtl->bs, + surf->row_pitch, + total_x_offset_el, + total_y_offset_el, + base_address_offset, + x_offset_el, + y_offset_el); } uint32_t diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 90193ca08c1..4f796f6c6a8 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1142,6 +1142,27 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * @brief Calculate the intratile offsets to a surface. + * + * In @a base_address_offset return the offset from the base of the surface to + * the base address of the first tile of the subimage. In @a x_offset_B and + * @a y_offset_rows, return the offset, in units of bytes and rows, from the + * tile's base to the subimage's first surface element. The x and y offsets + * are intratile offsets; that is, they do not exceed the boundary of the + * surface's tiling format. + */ +void +isl_tiling_get_intratile_offset_el(const struct isl_device *dev, + enum isl_tiling tiling, + uint8_t bs, + uint32_t row_pitch, + uint32_t total_x_offset_B, + uint32_t total_y_offset_rows, + uint32_t *base_address_offset, + uint32_t *x_offset_B, + uint32_t *y_offset_rows); + /** * @brief Calculate the intratile offsets to a subimage in the surface. * @@ -1162,18 +1183,6 @@ isl_surf_get_image_intratile_offset_el(const struct isl_device *dev, uint32_t *x_offset_el, uint32_t *y_offset_el); -/** - * See above. - */ -void -isl_surf_get_image_intratile_offset_el_xy(const struct isl_device *dev, - const struct isl_surf *surf, - uint32_t total_x_offset_el, - uint32_t total_y_offset_el, - uint32_t *base_address_offset, - uint32_t *x_offset_el, - uint32_t *y_offset_el); - /** * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat * diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 8e63eee462d..cf2dc66597d 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -145,12 +145,15 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, /* Create a VkImageView that starts at the tile aligned offset closest * to the provided x/y offset into the surface. */ + struct isl_surf *isl_surf = &anv_image_from_handle(*img)->color_surface.isl; + uint32_t img_o = 0; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(*img)-> - color_surface.isl, - *rect_x, *rect_y, - &img_o, rect_x, rect_y); + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + isl_surf->tiling, surf->bs, + isl_surf->row_pitch, + *rect_x * surf->bs, *rect_y, + &img_o, rect_x, rect_y); + anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, -- cgit v1.2.3 From 819d0e1a7c06e98cfe82f687e30dcbe6b974a1e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 16:52:56 -0700 Subject: anv/meta2d: Add support for blitting from W-tiled sources on gen7 Reviewed-by: Nanley Chery Reviewed-by: Chad Versace --- src/intel/vulkan/anv_meta_blit2d.c | 253 ++++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 49 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index cf2dc66597d..c3bf4152902 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -173,6 +173,10 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, struct blit2d_src_temps { VkImage image; struct anv_image_view iview; + + struct anv_buffer buffer; + struct anv_buffer_view bview; + VkDescriptorPool desc_pool; VkDescriptorSet set; }; @@ -187,56 +191,130 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, - &tmp->image, &tmp->iview); - - anv_CreateDescriptorPool(vk_device, - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + &tmp->image, &tmp->iview); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &tmp->desc_pool); - - anv_AllocateDescriptorSets(vk_device, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = tmp->desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.img_ds_layout - }, &tmp->set); - - anv_UpdateDescriptorSets(vk_device, - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = tmp->set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.img_p_layout, 0, 1, + &tmp->set, 0, NULL); + } else { + assert(src_type == BLIT2D_SRC_TYPE_W_DETILE); + assert(src->tiling == ISL_TILING_W); + assert(src->bs == 1); + + uint32_t tile_offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, src->pitch, + rect->src_x, rect->src_y, + &tile_offset, + &rect->src_x, &rect->src_y); + + tmp->buffer = (struct anv_buffer) { + .device = device, + .size = align_u32(rect->src_y + rect->height, 64) * src->pitch, + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + .bo = src->bo, + .offset = src->base_offset + tile_offset, + }; + + anv_buffer_view_init(&tmp->bview, device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = anv_buffer_to_handle(&tmp->buffer), + .format = VK_FORMAT_R8_UINT, + .offset = 0, + .range = VK_WHOLE_SIZE, + }, cmd_buffer); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { { - .sampler = NULL, - .imageView = anv_image_view_to_handle(&tmp->iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1 }, } - } - }, 0, NULL); + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { + anv_buffer_view_to_handle(&tmp->bview), + }, + } + }, 0, NULL); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.img_p_layout, 0, 1, - &tmp->set, 0, NULL); + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.buf_p_layout, 0, 1, + &tmp->set, 0, NULL); + } } static void @@ -246,8 +324,10 @@ blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, { anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), tmp->desc_pool, &cmd_buffer->pool->alloc); - anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), - tmp->image, &cmd_buffer->pool->alloc); + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), + tmp->image, &cmd_buffer->pool->alloc); + } } void @@ -478,6 +558,80 @@ typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *, struct anv_device *, nir_ssa_def *, nir_ssa_def *); +static nir_ssa_def * +nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset, + nir_ssa_def *src, unsigned src_offset, unsigned num_bits) +{ + unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset; + nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask)); + + nir_ssa_def *shifted; + if (dst_offset > src_offset) { + shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset)); + } else if (dst_offset < src_offset) { + shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset)); + } else { + assert(dst_offset == src_offset); + shifted = masked; + } + + return nir_ior(b, dst, shifted); +} + +static nir_ssa_def * +build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ + nir_ssa_def *x = nir_channel(b, tex_pos, 0); + nir_ssa_def *y = nir_channel(b, tex_pos, 1); + + /* First, compute the block-aligned offset */ + nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6)); + nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6)); + nir_ssa_def *offset = + nir_iadd(b, nir_imul(b, y_major, + nir_imul(b, tex_pitch, nir_imm_int(b, 64))), + nir_imul(b, x_major, nir_imm_int(b, 4096))); + + /* Compute the bottom 12 bits of the offset */ + offset = nir_copy_bits(b, offset, 0, x, 0, 1); + offset = nir_copy_bits(b, offset, 1, y, 0, 1); + offset = nir_copy_bits(b, offset, 2, x, 1, 1); + offset = nir_copy_bits(b, offset, 3, y, 1, 1); + offset = nir_copy_bits(b, offset, 4, x, 2, 1); + offset = nir_copy_bits(b, offset, 5, y, 2, 4); + offset = nir_copy_bits(b, offset, 9, x, 3, 3); + + if (device->isl_dev.has_bit6_swizzling) { + offset = nir_ixor(b, offset, + nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)), + nir_imm_int(b, 3))); + } + + const struct glsl_type *sampler_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(offset); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = false; + tex->coord_components = 1; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = NULL; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + static nir_ssa_def * build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) @@ -595,9 +749,10 @@ blit2d_init_pipeline(struct anv_device *device, src_func = build_nir_texel_fetch; break; case BLIT2D_SRC_TYPE_W_DETILE: - /* Not yet supported */ + src_func = build_nir_w_tiled_fetch; + break; default: - return VK_SUCCESS; + unreachable("Invalid blit2d source type"); } struct anv_shader_module fs = { .nir = NULL }; -- cgit v1.2.3 From 15a9468d85a01042743cab93593a57aec8ed3f0a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:14:57 -0700 Subject: anv/blit2d: Simplify create_iview Now it just creates the image and view. The caller is responsible for handling the offset calculations. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 55 +++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 30 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index c3bf4152902..eb0c048b533 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -95,29 +95,20 @@ vk_format_for_size(int bs) static void create_iview(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *surf, - struct anv_meta_blit2d_rect *rect, + uint64_t offset, VkImageUsageFlags usage, + uint32_t width, + uint32_t height, VkImage *img, struct anv_image_view *iview) { - struct isl_tile_info tile_info; - isl_tiling_get_info(&cmd_buffer->device->isl_dev, - surf->tiling, surf->bs, &tile_info); - const unsigned tile_width_px = tile_info.width > surf->bs ? - tile_info.width / surf->bs : 1; - uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_y : &rect->dst_y; - uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_x : &rect->dst_x; - - /* Define the shared state among all created image views */ const VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = vk_format_for_size(surf->bs), .extent = { - .width = rect->width + (*rect_x) % tile_width_px, - .height = rect->height + (*rect_y) % tile_info.height, + .width = width, + .height = height, .depth = 1, }, .mipLevels = 1, @@ -142,18 +133,6 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, anv_image_from_handle(*img)->bo = surf->bo; anv_image_from_handle(*img)->offset = surf->base_offset; - /* Create a VkImageView that starts at the tile aligned offset closest - * to the provided x/y offset into the surface. - */ - struct isl_surf *isl_surf = &anv_image_from_handle(*img)->color_surface.isl; - - uint32_t img_o = 0; - isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, - isl_surf->tiling, surf->bs, - isl_surf->row_pitch, - *rect_x * surf->bs, *rect_y, - &img_o, rect_x, rect_y); - anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -167,7 +146,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, .baseArrayLayer = 0, .layerCount = 1 }, - }, cmd_buffer, img_o, usage); + }, cmd_buffer, offset, usage); } struct blit2d_src_temps { @@ -192,7 +171,14 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); if (src_type == BLIT2D_SRC_TYPE_NORMAL) { - create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT, + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + src->tiling, src->bs, src->pitch, + rect->src_x, rect->src_y, + &offset, &rect->src_x, &rect->src_y); + + create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT, + rect->src_x + rect->width, rect->src_y + rect->height, &tmp->image, &tmp->iview); anv_CreateDescriptorPool(vk_device, @@ -369,15 +355,24 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, { struct anv_device *device = cmd_buffer->device; VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + dst->tiling, dst->bs, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + VkImage dst_img; struct anv_image_view dst_iview; - create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); + create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + &dst_img, &dst_iview); struct blit_vb_data { float pos[2]; -- cgit v1.2.3 From f9a2570a06949810b10395d81c19f6295d76c530 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:27:42 -0700 Subject: anv/blit2d: Add a bind_dst helper function Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 88 ++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 33 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index eb0c048b533..49646dd0374 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -316,6 +316,56 @@ blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, } } +struct blit2d_dst_temps { + VkImage image; + struct anv_image_view iview; + VkFramebuffer fb; +}; + +static void +blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *dst, + uint64_t offset, + uint32_t width, + uint32_t height, + struct blit2d_dst_temps *tmp) +{ + create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + width, height, &tmp->image, &tmp->iview); + + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&tmp->iview), + }, + .width = width, + .height = height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &tmp->fb); + + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = width, + .height = height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); +} + +static void +blit2d_unbind_dst(struct anv_cmd_buffer *cmd_buffer, + struct blit2d_dst_temps *tmp) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + anv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, tmp->image, &cmd_buffer->pool->alloc); +} + void anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) @@ -354,7 +404,6 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_rect *rects) { struct anv_device *device = cmd_buffer->device; - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); for (unsigned r = 0; r < num_rects; ++r) { struct blit2d_src_temps src_temps; @@ -367,12 +416,9 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, &offset, &rects[r].dst_x, &rects[r].dst_y); - VkImage dst_img; - struct anv_image_view dst_iview; - create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - rects[r].dst_x + rects[r].width, - rects[r].dst_y + rects[r].height, - &dst_img, &dst_iview); + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, &dst_temps); struct blit_vb_data { float pos[2]; @@ -441,24 +487,11 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, sizeof(struct anv_vue_header), }); - VkFramebuffer fb; - anv_CreateFramebuffer(vk_device, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&dst_iview), - }, - .width = dst_iview.extent.width, - .height = dst_iview.extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = device->meta_state.blit2d.render_pass, - .framebuffer = fb, + .framebuffer = dst_temps.fb, .renderArea = { .offset = { rects[r].dst_x, rects[r].dst_y, }, .extent = { rects[r].width, rects[r].height }, @@ -469,16 +502,6 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL); - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dst_iview.extent.width, - .height = dst_iview.extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); @@ -487,8 +510,7 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, * descriptor sets, etc. has been used. We are free to delete it. */ blit2d_unbind_src(cmd_buffer, src_type, &src_temps); - anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); + blit2d_unbind_dst(cmd_buffer, &dst_temps); } } -- cgit v1.2.3 From b377c1d08ed16ca0abeabc86e8d651036eea3145 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 09:32:05 -0700 Subject: anv/image: Remove the offset parameter from image_view_init The only place we were using this was in meta_blit2d which always creates a new image anyway so we can just use the image offset. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_image.c | 5 ++--- src/intel/vulkan/anv_meta_blit.c | 4 ++-- src/intel/vulkan/anv_meta_blit2d.c | 4 ++-- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 4 ++-- src/intel/vulkan/anv_private.h | 1 - 6 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 759c8612005..db109625316 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -464,7 +464,6 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset, VkImageUsageFlags usage_mask) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); @@ -495,7 +494,7 @@ anv_image_view_init(struct anv_image_view *iview, iview->image = image; iview->bo = image->bo; - iview->offset = image->offset + surface->offset + offset; + iview->offset = image->offset + surface->offset; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; iview->vk_format = pCreateInfo->format; @@ -614,7 +613,7 @@ anv_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_image_view_init(view, device, pCreateInfo, NULL, 0, ~0); + anv_image_view_init(view, device, pCreateInfo, NULL, ~0); *pView = anv_image_view_to_handle(view); diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 218499a8787..b726b94c3cd 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -384,7 +384,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); const VkOffset3D dest_offset = { .x = pRegions[r].dstOffsets[0].x, @@ -434,7 +434,7 @@ void anv_CmdBlitImage( .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); meta_emit_blit(cmd_buffer, src_image, &src_iview, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 49646dd0374..68ade55d8ea 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -131,7 +131,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, * creating a dummy memory object etc. so there's really no point. */ anv_image_from_handle(*img)->bo = surf->bo; - anv_image_from_handle(*img)->offset = surf->base_offset; + anv_image_from_handle(*img)->offset = surf->base_offset + offset; anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -146,7 +146,7 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, .baseArrayLayer = 0, .layerCount = 1 }, - }, cmd_buffer, offset, usage); + }, cmd_buffer, usage); } struct blit2d_src_temps { diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index a24e59950be..a03701c684b 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -813,7 +813,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .layerCount = 1 }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 3e7c7d39ba5..8c1bdc06f84 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -758,7 +758,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT); + cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT); struct anv_image_view dest_iview; anv_image_view_init(&dest_iview, cmd_buffer->device, @@ -775,7 +775,7 @@ void anv_CmdResolveImage( .layerCount = 1, }, }, - cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); VkFramebuffer fb_h; anv_CreateFramebuffer(device_h, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a394fe8a683..99d3934ddda 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1667,7 +1667,6 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer, - uint32_t offset, VkImageUsageFlags usage_mask); struct anv_buffer_view { -- cgit v1.2.3 From 2e827816fa10f6b5c9c13c5833e3af5db2621efa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 31 Mar 2016 15:27:30 -0700 Subject: anv/blit2d: Add another passthrough varying to the VS We need the VS to provide some setup data for other stages. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_meta_blit2d.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 68ade55d8ea..b6e33c84fdd 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -568,6 +568,15 @@ build_nir_vertex_shader(void) tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; nir_copy_var(&b, tex_pos_out, tex_pos_in); + nir_variable *other_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_other"); + other_in->data.location = VERT_ATTRIB_GENERIC2; + nir_variable *other_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_other"); + other_out->data.location = VARYING_SLOT_VAR1; + other_out->data.interpolation = INTERP_QUALIFIER_FLAT; + nir_copy_var(&b, other_out, other_in); + return b.shader; } -- cgit v1.2.3 From e3312644cbc8a74c262e35672547d5cce83fd1bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 20:31:25 -0700 Subject: anv/blit2d: Add support for W-tiled destinations Reviewed-by: Nanley Chery Reviewed-by: Chad Versace --- src/intel/vulkan/anv_meta_blit2d.c | 366 ++++++++++++++++++++++++++++++++----- 1 file changed, 322 insertions(+), 44 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index b6e33c84fdd..6d6127a8693 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -514,6 +514,149 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, } } +static void +anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + struct anv_device *device = cmd_buffer->device; + + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + assert(dst->bs == 1); + uint32_t offset; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + /* The original coordinates were in terms of an actual W-tiled offset + * but we are binding this image as Y-tiled. We need to adjust our + * rectangle accordingly. + */ + uint32_t xmin_Y, xmax_Y, ymin_Y, ymax_Y; + xmin_Y = (rects[r].dst_x / 8) * 16; + xmax_Y = DIV_ROUND_UP(rects[r].dst_x + rects[r].width, 8) * 16; + ymin_Y = (rects[r].dst_y / 4) * 2; + ymax_Y = DIV_ROUND_UP(rects[r].dst_y + rects[r].height, 4) * 2; + + struct anv_meta_blit2d_surf dst_Y = { + .bo = dst->bo, + .tiling = ISL_TILING_Y0, + .base_offset = dst->base_offset, + .bs = 1, + .pitch = dst->pitch * 2, + }; + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps); + + struct blit_vb_header { + struct anv_vue_header vue; + int32_t tex_offset[2]; + uint32_t tex_pitch; + uint32_t bounds[4]; + } *vb_header; + + struct blit_vb_data { + float pos[2]; + } *vb_data; + + unsigned vb_size = sizeof(*vb_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + vb_header = vb_state.map; + + *vb_header = (struct blit_vb_header) { + .tex_offset = { + rects[r].src_x - rects[r].dst_x, + rects[r].src_y - rects[r].dst_y, + }, + .tex_pitch = src->pitch, + .bounds = { + rects[r].dst_x, + rects[r].dst_y, + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + }; + + vb_data = (void *)(vb_header + 1); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + xmax_Y, + ymax_Y, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymax_Y, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymin_Y, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + (void *)vb_data - vb_state.map, + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { xmin_Y, ymin_Y, }, + .extent = { xmax_Y - xmin_Y, ymax_Y - ymin_Y }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_W_TILE); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } +} + void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, @@ -529,8 +672,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, } if (dst->tiling == ISL_TILING_W) { - assert(dst->bs == 1); - anv_finishme("Blitting to w-tiled destinations not yet supported"); + anv_meta_blit2d_w_tiled_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); return; } else if (dst->bs % 3 == 0) { anv_finishme("Blitting to RGB destinations not yet supported"); @@ -688,6 +831,47 @@ build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, return &tex->dest.ssa; } +static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + }, + }, +}; + static nir_shader * build_nir_copy_fragment_shader(struct anv_device *device, texel_fetch_build_func txf_func) @@ -718,6 +902,136 @@ build_nir_copy_fragment_shader(struct anv_device *device, return b.shader; } +static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 2 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 4, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Offset */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32_UINT, + .offset = 16 + }, + { + /* Destination bounds */ + .location = 3, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 28 + }, + }, +}; + +static nir_shader * +build_nir_w_tiled_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *ivec3 = glsl_vector_type(GLSL_TYPE_INT, 3); + const struct glsl_type *uvec4 = glsl_vector_type(GLSL_TYPE_UINT, 4); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + /* We need gl_FragCoord so we know our Y-tiled position */ + nir_variable *frag_coord_in = nir_variable_create(b.shader, + nir_var_shader_in, + vec4, "gl_FragCoord"); + frag_coord_in->data.location = VARYING_SLOT_POS; + frag_coord_in->data.origin_upper_left = true; + + /* In location 0 we have an ivec3 that has the offset from dest to + * source in the first two components and the stride in the third. + */ + nir_variable *tex_off_in = nir_variable_create(b.shader, nir_var_shader_in, + ivec3, "v_tex_off"); + tex_off_in->data.location = VARYING_SLOT_VAR0; + tex_off_in->data.interpolation = INTERP_QUALIFIER_FLAT; + + /* In location 1 we have a uvec4 that gives us the bounds of the + * destination. We need to discard if we get outside this boundary. + */ + nir_variable *bounds_in = nir_variable_create(b.shader, nir_var_shader_in, + uvec4, "v_bounds"); + bounds_in->data.location = VARYING_SLOT_VAR1; + bounds_in->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + + nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in)); + nir_ssa_def *x_Y = nir_channel(&b, frag_coord_int, 0); + nir_ssa_def *y_Y = nir_channel(&b, frag_coord_int, 1); + + /* Compute the W-tiled position from the Y-tiled position */ + nir_ssa_def *x_W = nir_iand(&b, x_Y, nir_imm_int(&b, 0xffffff80)); + x_W = nir_ushr(&b, x_W, nir_imm_int(&b, 1)); + x_W = nir_copy_bits(&b, x_W, 0, x_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 1, x_Y, 2, 1); + x_W = nir_copy_bits(&b, x_W, 2, y_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 3, x_Y, 4, 3); + + nir_ssa_def *y_W = nir_iand(&b, y_Y, nir_imm_int(&b, 0xffffffe0)); + y_W = nir_ishl(&b, y_W, nir_imm_int(&b, 1)); + y_W = nir_copy_bits(&b, y_W, 0, x_Y, 1, 1); + y_W = nir_copy_bits(&b, y_W, 1, x_Y, 3, 1); + y_W = nir_copy_bits(&b, y_W, 2, y_Y, 1, 4); + + /* Figure out if we are out-of-bounds and discard */ + nir_ssa_def *bounds = nir_load_var(&b, bounds_in); + nir_ssa_def *oob = + nir_ior(&b, nir_ult(&b, x_W, nir_channel(&b, bounds, 0)), + nir_ior(&b, nir_ult(&b, y_W, nir_channel(&b, bounds, 1)), + nir_ior(&b, nir_uge(&b, x_W, nir_channel(&b, bounds, 2)), + nir_uge(&b, y_W, nir_channel(&b, bounds, 3))))); + + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); + discard->src[0] = nir_src_for_ssa(oob); + nir_builder_instr_insert(&b, &discard->instr); + + unsigned swiz[4] = { 0, 1, 0, 0 }; + nir_ssa_def *tex_off = + nir_swizzle(&b, nir_load_var(&b, tex_off_in), swiz, 2, false); + nir_ssa_def *tex_pos = nir_iadd(&b, nir_vec2(&b, x_W, y_W), tex_off); + nir_ssa_def *tex_pitch = nir_channel(&b, nir_load_var(&b, tex_off_in), 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + nir_store_var(&b, color_out, color, 0xf); + + return b.shader; +} + void anv_device_finish_meta_blit2d_state(struct anv_device *device) { @@ -781,12 +1095,17 @@ blit2d_init_pipeline(struct anv_device *device, unreachable("Invalid blit2d source type"); } + const VkPipelineVertexInputStateCreateInfo *vi_create_info; struct anv_shader_module fs = { .nir = NULL }; switch (dst_type) { case BLIT2D_DST_TYPE_NORMAL: fs.nir = build_nir_copy_fragment_shader(device, src_func); + vi_create_info = &normal_vi_create_info; break; case BLIT2D_DST_TYPE_W_TILE: + fs.nir = build_nir_w_tiled_fragment_shader(device, src_func); + vi_create_info = &w_tiled_vi_create_info; + break; case BLIT2D_DST_TYPE_RGB: /* Not yet supported */ default: @@ -802,47 +1121,6 @@ blit2d_init_pipeline(struct anv_device *device, .nir = build_nir_vertex_shader(), }; - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -863,7 +1141,7 @@ blit2d_init_pipeline(struct anv_device *device, .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = ARRAY_SIZE(pipeline_shader_stages), .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, + .pVertexInputState = vi_create_info, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, -- cgit v1.2.3 From c226e72a399199a71579f22e5b088f50b1a10ac1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 17:43:44 -0700 Subject: anv/formats: Advertise blit support for stencil Thanks to advances in the blit code, we can do this now. Reviewed-by: Nanley Chery --- src/intel/vulkan/anv_formats.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 4d279a8fb72..750af793bdf 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -381,13 +381,11 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; if (anv_format_is_depth_or_stencil(&anv_formats[format])) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - if (physical_device->info->gen >= 8) { + if (physical_device->info->gen >= 8) tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; - } - if (anv_formats[format].has_depth) { - tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; - } + + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; } else { enum isl_format linear_fmt, tiled_fmt; struct anv_format_swizzle linear_swizzle, tiled_swizzle; -- cgit v1.2.3 From d4a28ae52abddd37c2adc6bb1f4e4b2de76a16fa Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 8 Apr 2016 17:07:49 -0700 Subject: anv/meta: Make clflushes conditional on !devinfo->has_llc --- src/intel/vulkan/anv_meta_blit.c | 3 ++- src/intel/vulkan/anv_meta_blit2d.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index b726b94c3cd..24e47142a5c 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -186,7 +186,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 6d6127a8693..a7405e00810 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -468,7 +468,8 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, @@ -611,7 +612,8 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, }, }; - anv_state_clflush(vb_state); + if (!device->info.has_llc) + anv_state_clflush(vb_state); struct anv_buffer vertex_buffer = { .device = device, -- cgit v1.2.3 From bff7a8c4f343a67149e6a6854e0597696b3d4b03 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 9 Apr 2016 17:06:59 -0700 Subject: anv/pipeline: Set up flat enables correctly --- src/intel/vulkan/genX_pipeline_util.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index cd138dfae61..654d2e0d43f 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -239,6 +239,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) .AttributeSwizzleEnable = true, .PointSpriteTextureCoordinateOrigin = UPPERLEFT, .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, + .ConstantInterpolationEnable = wm_prog_data->flat_inputs, #if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, -- cgit v1.2.3 From 76b0ba087c50a271867f98eaf2acf0364d5b706e Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 6 Apr 2016 15:57:32 -0700 Subject: anv/clear: Disable the scissor operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the scissor rectangle always matches that of the framebuffer, this operation isn't needed. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_clear.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index a03701c684b..50085894b9c 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -213,6 +213,7 @@ create_pipeline(struct anv_device *device, .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, .disable_viewport = true, + .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, -- cgit v1.2.3 From 9fae6ee02659463259e9d7d90a2edf5261887503 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 29 Mar 2016 13:31:30 -0700 Subject: anv/meta: Don't set the dynamic state for disabled operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CmdSet* functions dirty the CommandBuffer's dynamic state. This causes the new state to be emitted when CmdDraw is called. Since we don't need the state that would be emitted, don't call the CmdSet* functions. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 13 +------------ src/intel/vulkan/anv_meta_blit2d.c | 14 +------------- src/intel/vulkan/anv_meta_clear.c | 29 ----------------------------- src/intel/vulkan/anv_meta_resolve.c | 29 +---------------------------- 4 files changed, 3 insertions(+), 82 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 24e47142a5c..72eb0d1b15a 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -115,8 +115,7 @@ static void meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *saved_state) { - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(saved_state, cmd_buffer, 0); } static void @@ -306,16 +305,6 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout, 0, 1, diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index a7405e00810..1bde38785db 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -344,17 +344,6 @@ blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, .height = height, .layers = 1 }, &cmd_buffer->pool->alloc, &tmp->fb); - - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = width, - .height = height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); } static void @@ -377,8 +366,7 @@ void anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_saved_state *save) { - anv_meta_save(save, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + anv_meta_save(save, cmd_buffer, 0); } static void diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 50085894b9c..7512afeb584 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -44,7 +44,6 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, { anv_meta_save(saved_state, cmd_buffer, (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR) | (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); @@ -397,26 +396,6 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, .offset = state.offset, }; - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, (VkDeviceSize[]) { 0 }); @@ -596,14 +575,6 @@ emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, }, }); - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil); diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 8c1bdc06f84..87ebcaad215 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -41,9 +41,7 @@ static void meta_resolve_save(struct anv_meta_saved_state *saved_state, struct anv_cmd_buffer *cmd_buffer) { - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR)); + anv_meta_save(saved_state, cmd_buffer, 0); cmd_buffer->state.dynamic.viewport.count = 0; cmd_buffer->state.dynamic.scissor.count = 0; @@ -481,7 +479,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, struct anv_device *device = cmd_buffer->device; VkDevice device_h = anv_device_to_handle(device); VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct anv_image *src_image = src_iview->image; const struct vertex_attrs vertex_data[3] = { @@ -609,30 +606,6 @@ emit_resolve(struct anv_cmd_buffer *cmd_buffer, /*copyCount*/ 0, /*copies */ NULL); - ANV_CALL(CmdSetViewport)(cmd_buffer_h, - /*firstViewport*/ 0, - /*viewportCount*/ 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, - /*firstScissor*/ 0, - /*scissorCount*/ 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = (VkExtent2D) { fb->width, fb->height }, - }, - }); - VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); -- cgit v1.2.3 From 88d1c19c9dfd6be1a374917f707e3c77089d7013 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 7 Apr 2016 02:47:28 -0700 Subject: anv_cmd_buffer: Don't make the initial state dirty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid excessive state emission. Relevant state for an action command will get set by the user: From Chapter 5. Command Buffers, When a command buffer begins recording, all state in that command buffer is undefined. [...] Whenever the state of a command buffer is undefined, the application must set all relevant state on the command buffer before any state dependent commands such as draws and dispatches are recorded, otherwise the behavior of executing that command buffer is undefined. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_cmd_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index ac8bf5fc619..5693fab7678 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -123,7 +123,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) /* 0 isn't a valid config. This ensures that we always configure L3$. */ cmd_buffer->state.current_l3_config = 0; - state->dirty = ~0; + state->dirty = 0; state->vb_dirty = 0; state->descriptors_dirty = 0; state->push_constants_dirty = 0; -- cgit v1.2.3 From 992bbed98d0eb226c2ad45eafb3cb2ad68f3fed7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 31 Mar 2016 10:04:46 -0700 Subject: gen{7,8}_pipeline: Apply 3DPRIM_RECTLIST restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to 3D Primitives Overview in the Bspec, when the RECTLIST primitive is in use, the CLIP stage should be disabled or set to have a different Clip Mode, and Viewport Mapping must be disabled: Clipping: Must not require clipping or rely on the CLIP unit’s ClipTest logic to determine if clipping is required. Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip Mode should be set to a value other than CLIPMODE_NORMAL. Viewport Mapping must be DISABLED (as is typical with the use of screen-space coordinates). We swap out ::disable_viewport for ::use_rectlist, because we currently always use the RECTLIST primitive when we disable viewport mapping, and we'll likely continue to use this primitive. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/gen7_pipeline.c | 4 ++-- src/intel/vulkan/gen8_pipeline.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 37e4639b287..10397343e5f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -47,7 +47,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .StatisticsEnable = true, .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ViewTransformEnable = !(extra && extra->disable_viewport), + .ViewTransformEnable = !(extra && extra->use_rectlist), .FrontWinding = vk_to_gen_front_face[info->frontFace], /* bool AntiAliasingEnable; */ @@ -225,7 +225,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], .CullMode = vk_to_gen_cullmode[rs_info->cullMode], - .ClipEnable = true, + .ClipEnable = !(extra && extra->use_rectlist), .APIMode = APIMODE_OGL, .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), .ClipMode = CLIPMODE_NORMAL, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index b8b29d46b8a..0d71e0719da 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -56,7 +56,7 @@ emit_rs_state(struct anv_pipeline *pipeline, struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), - .ViewportTransformEnable = !(extra && extra->disable_viewport), + .ViewportTransformEnable = !(extra && extra->use_rectlist), .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, .TriangleFanProvokingVertexSelect = 1, @@ -348,7 +348,7 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), - .ClipEnable = true, + .ClipEnable = !(extra && extra->use_rectlist), .EarlyCullEnable = true, .APIMode = 1, /* D3D */ .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), -- cgit v1.2.3 From cff0f6b027f139cc33c9ecbfd22f9662d75c0cb7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Thu, 31 Mar 2016 23:16:12 -0700 Subject: gen{7,8}_pipeline: Always set ViewportXYClipTestEnable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the following reasons, there is no behavioural change with this commit: the ViewportXYClipTest function of the CLIP stage will continue to be enabled outside of Meta (where disable_viewport is always false), and the CLIP stage is turned off within Meta, so this function will continue to be disabled in that case. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 10397343e5f..5c04fb749cc 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -227,7 +227,7 @@ genX(graphics_pipeline_create)( .CullMode = vk_to_gen_cullmode[rs_info->cullMode], .ClipEnable = !(extra && extra->use_rectlist), .APIMode = APIMODE_OGL, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ViewportXYClipTestEnable = true, .ClipMode = CLIPMODE_NORMAL, .TriangleStripListProvokingVertexSelect = 0, .LineStripListProvokingVertexSelect = 0, diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 0d71e0719da..7f26ef5e197 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -351,7 +351,7 @@ genX(graphics_pipeline_create)( .ClipEnable = !(extra && extra->use_rectlist), .EarlyCullEnable = true, .APIMode = 1, /* D3D */ - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ViewportXYClipTestEnable = true, .ClipMode = pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? -- cgit v1.2.3 From 9f72466e9f03e72cc805775e8f6104c212150ba7 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Mon, 11 Apr 2016 09:57:21 -0700 Subject: anv: Delete anv_graphics_pipeline_create_info::disable_viewport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users of this field. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 1 - src/intel/vulkan/anv_meta_blit2d.c | 1 - src/intel/vulkan/anv_meta_clear.c | 1 - src/intel/vulkan/anv_meta_resolve.c | 1 - src/intel/vulkan/anv_private.h | 1 - 5 files changed, 5 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 72eb0d1b15a..6c3668bdbf7 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -669,7 +669,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 1bde38785db..5c1e30c12ac 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -1190,7 +1190,6 @@ blit2d_init_pipeline(struct anv_device *device, const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 7512afeb584..6dd3e0be759 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -211,7 +211,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 87ebcaad215..9efe6f7d986 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -343,7 +343,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, .disable_scissor = true, .disable_vs = true, .use_rectlist = true diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 99d3934ddda..d62e5baeaa9 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1488,7 +1488,6 @@ struct anv_graphics_pipeline_create_info { int8_t color_attachment_count; bool use_repclear; - bool disable_viewport; bool disable_scissor; bool disable_vs; bool use_rectlist; -- cgit v1.2.3 From 1949e502bc74f0d65127ceef024b2c8af848f94c Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 13 Apr 2016 11:59:54 -0700 Subject: anv: Replace ::disable_scissor with ::use_rectlists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Meta currently uses screenspace RECTLIST primitives that lie within the framebuffer rectangle. Since this behavior shouldn't change in the future, disable the scissor operation whenever rectlists are used. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_blit.c | 1 - src/intel/vulkan/anv_meta_blit2d.c | 1 - src/intel/vulkan/anv_meta_clear.c | 1 - src/intel/vulkan/anv_meta_resolve.c | 1 - src/intel/vulkan/anv_private.h | 1 - src/intel/vulkan/gen7_pipeline.c | 2 +- src/intel/vulkan/gen8_pipeline.c | 2 +- 7 files changed, 2 insertions(+), 7 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index 6c3668bdbf7..3c54ef4bafb 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -669,7 +669,6 @@ anv_device_init_meta_blit_state(struct anv_device *device) const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }; diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 5c1e30c12ac..577eeaea104 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -1190,7 +1190,6 @@ blit2d_init_pipeline(struct anv_device *device, const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }; diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 6dd3e0be759..c5c7d563e91 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -211,7 +211,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = MAX_RTS, .use_repclear = use_repclear, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index 9efe6f7d986..f83bb6bbfd3 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -343,7 +343,6 @@ create_pipeline(struct anv_device *device, &(struct anv_graphics_pipeline_create_info) { .color_attachment_count = -1, .use_repclear = false, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d62e5baeaa9..ae2e08d2dfb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1488,7 +1488,6 @@ struct anv_graphics_pipeline_create_info { int8_t color_attachment_count; bool use_repclear; - bool disable_scissor; bool disable_vs; bool use_rectlist; }; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5c04fb749cc..d6d5ce6778f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -54,7 +54,7 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, .CullMode = vk_to_gen_cullmode[info->cullMode], /* uint32_t LineEndCapAntialiasingRegionWidth; */ - .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ScissorRectangleEnable = !(extra && extra->use_rectlist), /* uint32_t MultisampleRasterizationMode; */ /* bool LastPixelEnable; */ diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 7f26ef5e197..6f6868ea5ea 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -82,7 +82,7 @@ emit_rs_state(struct anv_pipeline *pipeline, .CullMode = vk_to_gen_cullmode[info->cullMode], .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ScissorRectangleEnable = !(extra && extra->use_rectlist), #if GEN_GEN == 8 .ViewportZClipTestEnable = true, #else -- cgit v1.2.3 From 79fbec30fc16399ede9385ef52cb62cefbb388f4 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Wed, 30 Mar 2016 17:13:01 -0700 Subject: anv: Remove default scissor and viewport concepts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users should never provide a scissor or viewport count of 0 because they are required to set such state in a graphics pipeline. This behavior was previously only used in Meta, which actually just disables those hardware operations at pipeline creation time. Kristian noticed that the current assignment of viewport count reduces the number of viewport uploads, so it is not removed. Signed-off-by: Nanley Chery Reviewed-by: Kristian Høgsberg Kristensen --- src/intel/vulkan/anv_meta_clear.c | 2 +- src/intel/vulkan/anv_meta_resolve.c | 3 --- src/intel/vulkan/gen7_cmd_buffer.c | 26 ++++---------------------- src/intel/vulkan/gen8_cmd_buffer.c | 27 ++++----------------------- 4 files changed, 9 insertions(+), 49 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index c5c7d563e91..eb4e56984c3 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -47,8 +47,8 @@ meta_clear_begin(struct anv_meta_saved_state *saved_state, (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE) | (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)); + /* Avoid uploading more viewport states than necessary */ cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; } static void diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c index f83bb6bbfd3..7d2a75bb752 100644 --- a/src/intel/vulkan/anv_meta_resolve.c +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -42,9 +42,6 @@ meta_resolve_save(struct anv_meta_saved_state *saved_state, struct anv_cmd_buffer *cmd_buffer) { anv_meta_save(saved_state, cmd_buffer, 0); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; } static void diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index b5d21efb203..5130a40d277 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -133,10 +133,11 @@ clamp_int64(int64_t x, int64_t min, int64_t max) } #if GEN_GEN == 7 && !GEN_IS_HASWELL -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { + uint32_t count = cmd_buffer->state.dynamic.scissor.count; + const VkRect2D *scissors = cmd_buffer->state.dynamic.scissor.scissors; struct anv_state scissor_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); @@ -178,25 +179,6 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, if (!cmd_buffer->device->info.has_llc) anv_state_clflush(scissor_state); } - -void -gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); - } -} #endif static const uint32_t vk_to_gen_index_type[] = { diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 5b6afb3d70d..3956a58d201 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -33,10 +33,11 @@ #include "genxml/genX_pack.h" #if GEN_GEN == 8 -static void -emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkViewport *viewports) +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) { + uint32_t count = cmd_buffer->state.dynamic.viewport.count; + const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; struct anv_state sf_clip_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); struct anv_state cc_state = @@ -86,26 +87,6 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), .SFClipViewportPointer = sf_clip_state.offset); } - -void -gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.viewport.count > 0) { - emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); - } else { - /* If viewport count is 0, this is taken to mean "use the default" */ - emit_viewport_state(cmd_buffer, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - } -} #endif #define emit_lri(batch, reg, imm) \ -- cgit v1.2.3 From c34be07230ef98d5021f0bdc88c3b0bc804ee2dd Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 10:28:45 -0700 Subject: spirv: Move to compiler/ While it does rely on NIR, it's not really part of the NIR core. At the moment, it still builds as part of libnir but that can be changed later if desired. --- src/compiler/Makefile.sources | 14 +- src/compiler/nir/spirv/GLSL.std.450.h | 127 -- src/compiler/nir/spirv/nir_spirv.h | 54 - src/compiler/nir/spirv/spirv.h | 870 ---------- src/compiler/nir/spirv/spirv_to_nir.c | 2710 -------------------------------- src/compiler/nir/spirv/vtn_alu.c | 464 ------ src/compiler/nir/spirv/vtn_cfg.c | 778 --------- src/compiler/nir/spirv/vtn_glsl450.c | 666 -------- src/compiler/nir/spirv/vtn_private.h | 484 ------ src/compiler/nir/spirv/vtn_variables.c | 1415 ----------------- src/compiler/spirv/GLSL.std.450.h | 127 ++ src/compiler/spirv/nir_spirv.h | 54 + src/compiler/spirv/spirv.h | 870 ++++++++++ src/compiler/spirv/spirv_to_nir.c | 2710 ++++++++++++++++++++++++++++++++ src/compiler/spirv/vtn_alu.c | 464 ++++++ src/compiler/spirv/vtn_cfg.c | 778 +++++++++ src/compiler/spirv/vtn_glsl450.c | 666 ++++++++ src/compiler/spirv/vtn_private.h | 484 ++++++ src/compiler/spirv/vtn_variables.c | 1415 +++++++++++++++++ src/intel/vulkan/anv_pipeline.c | 2 +- 20 files changed, 7576 insertions(+), 7576 deletions(-) delete mode 100644 src/compiler/nir/spirv/GLSL.std.450.h delete mode 100644 src/compiler/nir/spirv/nir_spirv.h delete mode 100644 src/compiler/nir/spirv/spirv.h delete mode 100644 src/compiler/nir/spirv/spirv_to_nir.c delete mode 100644 src/compiler/nir/spirv/vtn_alu.c delete mode 100644 src/compiler/nir/spirv/vtn_cfg.c delete mode 100644 src/compiler/nir/spirv/vtn_glsl450.c delete mode 100644 src/compiler/nir/spirv/vtn_private.h delete mode 100644 src/compiler/nir/spirv/vtn_variables.c create mode 100644 src/compiler/spirv/GLSL.std.450.h create mode 100644 src/compiler/spirv/nir_spirv.h create mode 100644 src/compiler/spirv/spirv.h create mode 100644 src/compiler/spirv/spirv_to_nir.c create mode 100644 src/compiler/spirv/vtn_alu.c create mode 100644 src/compiler/spirv/vtn_cfg.c create mode 100644 src/compiler/spirv/vtn_glsl450.c create mode 100644 src/compiler/spirv/vtn_private.h create mode 100644 src/compiler/spirv/vtn_variables.c (limited to 'src/intel/vulkan') diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index adc7a428469..19735339bca 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -235,10 +235,10 @@ NIR_FILES = \ nir/nir_worklist.h SPIRV_FILES = \ - nir/spirv/nir_spirv.h \ - nir/spirv/spirv_to_nir.c \ - nir/spirv/vtn_alu.c \ - nir/spirv/vtn_cfg.c \ - nir/spirv/vtn_glsl450.c \ - nir/spirv/vtn_private.h \ - nir/spirv/vtn_variables.c + spirv/nir_spirv.h \ + spirv/spirv_to_nir.c \ + spirv/vtn_alu.c \ + spirv/vtn_cfg.c \ + spirv/vtn_glsl450.c \ + spirv/vtn_private.h \ + spirv/vtn_variables.c diff --git a/src/compiler/nir/spirv/GLSL.std.450.h b/src/compiler/nir/spirv/GLSL.std.450.h deleted file mode 100644 index d1c9b5c1d44..00000000000 --- a/src/compiler/nir/spirv/GLSL.std.450.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 3; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/src/compiler/nir/spirv/nir_spirv.h b/src/compiler/nir/spirv/nir_spirv.h deleted file mode 100644 index 500f2cb94df..00000000000 --- a/src/compiler/nir/spirv/nir_spirv.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#pragma once - -#ifndef _NIR_SPIRV_H_ -#define _NIR_SPIRV_H_ - -#include "nir/nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct nir_spirv_specialization { - uint32_t id; - uint32_t data; -}; - -nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, - struct nir_spirv_specialization *specializations, - unsigned num_specializations, - gl_shader_stage stage, const char *entry_point_name, - const nir_shader_compiler_options *options); - -#ifdef __cplusplus -} -#endif - -#endif /* _NIR_SPIRV_H_ */ diff --git a/src/compiler/nir/spirv/spirv.h b/src/compiler/nir/spirv/spirv.h deleted file mode 100644 index 63bcb2f88dd..00000000000 --- a/src/compiler/nir/spirv/spirv.h +++ /dev/null @@ -1,870 +0,0 @@ -/* -** Copyright (c) 2014-2015 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 2 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 2; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c deleted file mode 100644 index 99514b49650..00000000000 --- a/src/compiler/nir/spirv/spirv_to_nir.c +++ /dev/null @@ -1,2710 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" -#include "nir/nir_vla.h" -#include "nir/nir_control_flow.h" -#include "nir/nir_constant_expressions.h" - -static struct vtn_ssa_value * -vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(val->type)); - val->def = nir_ssa_undef(&b->nb, num_components, bit_size); - } else { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - if (glsl_type_is_matrix(type)) { - const struct glsl_type *elem_type = - glsl_vector_type(glsl_get_base_type(type), - glsl_get_vector_elements(type)); - - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else if (glsl_type_is_array(type)) { - const struct glsl_type *elem_type = glsl_get_array_element(type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } else { - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = glsl_get_struct_field(type, i); - val->elems[i] = vtn_undef_ssa_value(b, elem_type); - } - } - } - - return val; -} - -static struct vtn_ssa_value * -vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, - const struct glsl_type *type) -{ - struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); - - if (entry) - return entry->data; - - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - if (glsl_type_is_vector_or_scalar(type)) { - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components, 32); - - for (unsigned i = 0; i < num_components; i++) - load->value.u32[i] = constant->value.u[i]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - val->def = &load->def; - } else { - assert(glsl_type_is_matrix(type)); - unsigned rows = glsl_get_vector_elements(val->type); - unsigned columns = glsl_get_matrix_columns(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); - - for (unsigned i = 0; i < columns; i++) { - struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); - col_val->type = glsl_get_column_type(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, rows, 32); - - for (unsigned j = 0; j < rows; j++) - load->value.u32[j] = constant->value.u[rows * i + j]; - - nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); - col_val->def = &load->def; - - val->elems[i] = col_val; - } - } - break; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - const struct glsl_type *elem_type = glsl_get_array_element(val->type); - for (unsigned i = 0; i < elems; i++) - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - break; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(val->type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *elem_type = - glsl_get_struct_field(val->type, i); - val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], - elem_type); - } - break; - } - - default: - unreachable("bad constant type"); - } - - return val; -} - -struct vtn_ssa_value * -vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - switch (val->value_type) { - case vtn_value_type_undef: - return vtn_undef_ssa_value(b, val->type->type); - - case vtn_value_type_constant: - return vtn_const_ssa_value(b, val->constant, val->const_type); - - case vtn_value_type_ssa: - return val->ssa; - - case vtn_value_type_access_chain: - /* This is needed for function parameters */ - return vtn_variable_load(b, val->access_chain); - - default: - unreachable("Invalid type for an SSA value"); - } -} - -static char * -vtn_string_literal(struct vtn_builder *b, const uint32_t *words, - unsigned word_count, unsigned *words_used) -{ - char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); - if (words_used) { - /* Ammount of space taken by the string (including the null) */ - unsigned len = strlen(dup) + 1; - *words_used = DIV_ROUND_UP(len, sizeof(*words)); - } - return dup; -} - -const uint32_t * -vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, - const uint32_t *end, vtn_instruction_handler handler) -{ - b->file = NULL; - b->line = -1; - b->col = -1; - - const uint32_t *w = start; - while (w < end) { - SpvOp opcode = w[0] & SpvOpCodeMask; - unsigned count = w[0] >> SpvWordCountShift; - assert(count >= 1 && w + count <= end); - - switch (opcode) { - case SpvOpNop: - break; /* Do nothing */ - - case SpvOpLine: - b->file = vtn_value(b, w[1], vtn_value_type_string)->str; - b->line = w[2]; - b->col = w[3]; - break; - - case SpvOpNoLine: - b->file = NULL; - b->line = -1; - b->col = -1; - break; - - default: - if (!handler(b, opcode, w, count)) - return w; - break; - } - - w += count; - } - assert(w == end); - return w; -} - -static void -vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpExtInstImport: { - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); - if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { - val->ext_handler = vtn_handle_glsl450_instruction; - } else { - assert(!"Unsupported extension"); - } - break; - } - - case SpvOpExtInst: { - struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); - bool handled = val->ext_handler(b, w[4], w, count); - (void)handled; - assert(handled); - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -static void -_foreach_decoration_helper(struct vtn_builder *b, - struct vtn_value *base_value, - int parent_member, - struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - int member; - if (dec->scope == VTN_DEC_DECORATION) { - member = parent_member; - } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { - assert(parent_member == -1); - member = dec->scope - VTN_DEC_STRUCT_MEMBER0; - } else { - /* Not a decoration */ - continue; - } - - if (dec->group) { - assert(dec->group->value_type == vtn_value_type_decoration_group); - _foreach_decoration_helper(b, base_value, member, dec->group, - cb, data); - } else { - cb(b, base_value, member, dec, data); - } - } -} - -/** Iterates (recursively if needed) over all of the decorations on a value - * - * This function iterates over all of the decorations applied to a given - * value. If it encounters a decoration group, it recurses into the group - * and iterates over all of those decorations as well. - */ -void -vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data) -{ - _foreach_decoration_helper(b, value, -1, value, cb, data); -} - -void -vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, - vtn_execution_mode_foreach_cb cb, void *data) -{ - for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { - if (dec->scope != VTN_DEC_EXECUTION_MODE) - continue; - - assert(dec->group == NULL); - cb(b, value, dec, data); - } -} - -static void -vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - const uint32_t *w_end = w + count; - const uint32_t target = w[1]; - w += 2; - - switch (opcode) { - case SpvOpDecorationGroup: - vtn_push_value(b, target, vtn_value_type_decoration_group); - break; - - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpExecutionMode: { - struct vtn_value *val = &b->values[target]; - - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - switch (opcode) { - case SpvOpDecorate: - dec->scope = VTN_DEC_DECORATION; - break; - case SpvOpMemberDecorate: - dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); - break; - case SpvOpExecutionMode: - dec->scope = VTN_DEC_EXECUTION_MODE; - break; - default: - unreachable("Invalid decoration opcode"); - } - dec->decoration = *(w++); - dec->literals = w; - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - break; - } - - case SpvOpGroupMemberDecorate: - case SpvOpGroupDecorate: { - struct vtn_value *group = - vtn_value(b, target, vtn_value_type_decoration_group); - - for (; w < w_end; w++) { - struct vtn_value *val = vtn_untyped_value(b, *w); - struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); - - dec->group = group; - if (opcode == SpvOpGroupDecorate) { - dec->scope = VTN_DEC_DECORATION; - } else { - dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); - } - - /* Link into the list */ - dec->next = val->decoration; - val->decoration = dec; - } - break; - } - - default: - unreachable("Unhandled opcode"); - } -} - -struct member_decoration_ctx { - unsigned num_fields; - struct glsl_struct_field *fields; - struct vtn_type *type; -}; - -/* does a shallow copy of a vtn_type */ - -static struct vtn_type * -vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) -{ - struct vtn_type *dest = ralloc(b, struct vtn_type); - dest->type = src->type; - dest->is_builtin = src->is_builtin; - if (src->is_builtin) - dest->builtin = src->builtin; - - if (!glsl_type_is_scalar(src->type)) { - switch (glsl_get_base_type(src->type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_ARRAY: - dest->row_major = src->row_major; - dest->stride = src->stride; - dest->array_element = src->array_element; - break; - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(src->type); - - dest->members = ralloc_array(b, struct vtn_type *, elems); - memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); - - dest->offsets = ralloc_array(b, unsigned, elems); - memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); - break; - } - - default: - unreachable("unhandled type"); - } - } - - return dest; -} - -static struct vtn_type * -mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) -{ - type->members[member] = vtn_type_copy(b, type->members[member]); - type = type->members[member]; - - /* We may have an array of matrices.... Oh, joy! */ - while (glsl_type_is_array(type->type)) { - type->array_element = vtn_type_copy(b, type->array_element); - type = type->array_element; - } - - assert(glsl_type_is_matrix(type->type)); - - return type; -} - -static void -struct_member_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_ctx) -{ - struct member_decoration_ctx *ctx = void_ctx; - - if (member < 0) - return; - - assert(member < ctx->num_fields); - - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - ctx->fields[member].centroid = true; - break; - case SpvDecorationSample: - ctx->fields[member].sample = true; - break; - case SpvDecorationLocation: - ctx->fields[member].location = dec->literals[0]; - break; - case SpvDecorationBuiltIn: - ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); - ctx->type->members[member]->is_builtin = true; - ctx->type->members[member]->builtin = dec->literals[0]; - ctx->type->builtin_block = true; - break; - case SpvDecorationOffset: - ctx->type->offsets[member] = dec->literals[0]; - break; - case SpvDecorationMatrixStride: - mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; - break; - case SpvDecorationColMajor: - break; /* Nothing to do here. Column-major is the default. */ - case SpvDecorationRowMajor: - mutable_matrix_member(b, ctx->type, member)->row_major = true; - break; - default: - unreachable("Unhandled member decoration"); - } -} - -static void -type_decoration_cb(struct vtn_builder *b, - struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *ctx) -{ - struct vtn_type *type = val->type; - - if (member != -1) - return; - - switch (dec->decoration) { - case SpvDecorationArrayStride: - type->stride = dec->literals[0]; - break; - case SpvDecorationBlock: - type->block = true; - break; - case SpvDecorationBufferBlock: - type->buffer_block = true; - break; - case SpvDecorationGLSLShared: - case SpvDecorationGLSLPacked: - /* Ignore these, since we get explicit offsets anyways */ - break; - - case SpvDecorationStream: - assert(dec->literals[0] == 0); - break; - - default: - unreachable("Unhandled type decoration"); - } -} - -static unsigned -translate_image_format(SpvImageFormat format) -{ - switch (format) { - case SpvImageFormatUnknown: return 0; /* GL_NONE */ - case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ - case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ - case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ - case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ - case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ - case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ - case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ - case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ - case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ - case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ - case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ - case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ - case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ - case SpvImageFormatR16: return 0x822A; /* GL_R16 */ - case SpvImageFormatR8: return 0x8229; /* GL_R8 */ - case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ - case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ - case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ - case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ - case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ - case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ - case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ - case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ - case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ - case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ - case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ - case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ - case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ - case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ - case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ - case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ - case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ - case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ - case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ - case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ - case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ - case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ - case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ - default: - assert(!"Invalid image format"); - return 0; - } -} - -static void -vtn_handle_type(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); - - val->type = rzalloc(b, struct vtn_type); - val->type->is_builtin = false; - val->type->val = val; - - switch (opcode) { - case SpvOpTypeVoid: - val->type->type = glsl_void_type(); - break; - case SpvOpTypeBool: - val->type->type = glsl_bool_type(); - break; - case SpvOpTypeInt: { - const bool signedness = w[3]; - val->type->type = (signedness ? glsl_int_type() : glsl_uint_type()); - break; - } - case SpvOpTypeFloat: - val->type->type = glsl_float_type(); - break; - - case SpvOpTypeVector: { - struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; - unsigned elems = w[3]; - - assert(glsl_type_is_scalar(base->type)); - val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); - - /* Vectors implicitly have sizeof(base_type) stride. For now, this - * is always 4 bytes. This will have to change if we want to start - * supporting doubles or half-floats. - */ - val->type->stride = 4; - val->type->array_element = base; - break; - } - - case SpvOpTypeMatrix: { - struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; - unsigned columns = w[3]; - - assert(glsl_type_is_vector(base->type)); - val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), - glsl_get_vector_elements(base->type), - columns); - assert(!glsl_type_is_error(val->type->type)); - val->type->array_element = base; - val->type->row_major = false; - val->type->stride = 0; - break; - } - - case SpvOpTypeRuntimeArray: - case SpvOpTypeArray: { - struct vtn_type *array_element = - vtn_value(b, w[2], vtn_value_type_type)->type; - - unsigned length; - if (opcode == SpvOpTypeRuntimeArray) { - /* A length of 0 is used to denote unsized arrays */ - length = 0; - } else { - length = - vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; - } - - val->type->type = glsl_array_type(array_element->type, length); - val->type->array_element = array_element; - val->type->stride = 0; - break; - } - - case SpvOpTypeStruct: { - unsigned num_fields = count - 2; - val->type->members = ralloc_array(b, struct vtn_type *, num_fields); - val->type->offsets = ralloc_array(b, unsigned, num_fields); - - NIR_VLA(struct glsl_struct_field, fields, count); - for (unsigned i = 0; i < num_fields; i++) { - val->type->members[i] = - vtn_value(b, w[i + 2], vtn_value_type_type)->type; - fields[i] = (struct glsl_struct_field) { - .type = val->type->members[i]->type, - .name = ralloc_asprintf(b, "field%d", i), - .location = -1, - }; - } - - struct member_decoration_ctx ctx = { - .num_fields = num_fields, - .fields = fields, - .type = val->type - }; - - vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); - - const char *name = val->name ? val->name : "struct"; - - val->type->type = glsl_struct_type(fields, num_fields, name); - break; - } - - case SpvOpTypeFunction: { - const struct glsl_type *return_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - NIR_VLA(struct glsl_function_param, params, count - 3); - for (unsigned i = 0; i < count - 3; i++) { - params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; - - /* FIXME: */ - params[i].in = true; - params[i].out = true; - } - val->type->type = glsl_function_type(return_type, params, count - 3); - break; - } - - case SpvOpTypePointer: - /* FIXME: For now, we'll just do the really lame thing and return - * the same type. The validator should ensure that the proper number - * of dereferences happen - */ - val->type = vtn_value(b, w[3], vtn_value_type_type)->type; - break; - - case SpvOpTypeImage: { - const struct glsl_type *sampled_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; - - assert(glsl_type_is_vector_or_scalar(sampled_type)); - - enum glsl_sampler_dim dim; - switch ((SpvDim)w[3]) { - case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; - case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; - case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; - case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; - case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; - case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; - default: - unreachable("Invalid SPIR-V Sampler dimension"); - } - - bool is_shadow = w[4]; - bool is_array = w[5]; - bool multisampled = w[6]; - unsigned sampled = w[7]; - SpvImageFormat format = w[8]; - - if (count > 9) - val->type->access_qualifier = w[9]; - else - val->type->access_qualifier = SpvAccessQualifierReadWrite; - - if (multisampled) { - assert(dim == GLSL_SAMPLER_DIM_2D); - dim = GLSL_SAMPLER_DIM_MS; - } - - val->type->image_format = translate_image_format(format); - - if (sampled == 1) { - val->type->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); - } else if (sampled == 2) { - assert(format); - assert(!is_shadow); - val->type->type = glsl_image_type(dim, is_array, - glsl_get_base_type(sampled_type)); - } else { - assert(!"We need to know if the image will be sampled"); - } - break; - } - - case SpvOpTypeSampledImage: - val->type = vtn_value(b, w[2], vtn_value_type_type)->type; - break; - - case SpvOpTypeSampler: - /* The actual sampler type here doesn't really matter. It gets - * thrown away the moment you combine it with an image. What really - * matters is that it's a sampler type as opposed to an integer type - * so the backend knows what to do. - */ - val->type->type = glsl_bare_sampler_type(); - break; - - case SpvOpTypeOpaque: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - default: - unreachable("Unhandled opcode"); - } - - vtn_foreach_decoration(b, val, type_decoration_cb, NULL); -} - -static nir_constant * -vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) -{ - nir_constant *c = rzalloc(b, nir_constant); - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - /* Nothing to do here. It's already initialized to zero */ - break; - - case GLSL_TYPE_ARRAY: - assert(glsl_get_length(type) > 0); - c->num_elements = glsl_get_length(type); - c->elements = ralloc_array(b, nir_constant *, c->num_elements); - - c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); - for (unsigned i = 1; i < c->num_elements; i++) - c->elements[i] = c->elements[0]; - break; - - case GLSL_TYPE_STRUCT: - c->num_elements = glsl_get_length(type); - c->elements = ralloc_array(b, nir_constant *, c->num_elements); - - for (unsigned i = 0; i < c->num_elements; i++) { - c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); - } - break; - - default: - unreachable("Invalid type for null constant"); - } - - return c; -} - -static void -spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, - int member, const struct vtn_decoration *dec, - void *data) -{ - assert(member == -1); - if (dec->decoration != SpvDecorationSpecId) - return; - - uint32_t *const_value = data; - - for (unsigned i = 0; i < b->num_specializations; i++) { - if (b->specializations[i].id == dec->literals[0]) { - *const_value = b->specializations[i].data; - return; - } - } -} - -static uint32_t -get_specialization(struct vtn_builder *b, struct vtn_value *val, - uint32_t const_value) -{ - vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); - return const_value; -} - -static void -vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); - val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->constant = rzalloc(b, nir_constant); - switch (opcode) { - case SpvOpConstantTrue: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_TRUE; - break; - case SpvOpConstantFalse: - assert(val->const_type == glsl_bool_type()); - val->constant->value.u[0] = NIR_FALSE; - break; - - case SpvOpSpecConstantTrue: - case SpvOpSpecConstantFalse: { - assert(val->const_type == glsl_bool_type()); - uint32_t int_val = - get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); - val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; - break; - } - - case SpvOpConstant: - assert(glsl_type_is_scalar(val->const_type)); - val->constant->value.u[0] = w[3]; - break; - case SpvOpSpecConstant: - assert(glsl_type_is_scalar(val->const_type)); - val->constant->value.u[0] = get_specialization(b, val, w[3]); - break; - case SpvOpSpecConstantComposite: - case SpvOpConstantComposite: { - unsigned elem_count = count - 3; - nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); - for (unsigned i = 0; i < elem_count; i++) - elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; - - switch (glsl_get_base_type(val->const_type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(val->const_type)) { - unsigned rows = glsl_get_vector_elements(val->const_type); - assert(glsl_get_matrix_columns(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - for (unsigned j = 0; j < rows; j++) - val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; - } else { - assert(glsl_type_is_vector(val->const_type)); - assert(glsl_get_vector_elements(val->const_type) == elem_count); - for (unsigned i = 0; i < elem_count; i++) - val->constant->value.u[i] = elems[i]->value.u[0]; - } - ralloc_free(elems); - break; - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - ralloc_steal(val->constant, elems); - val->constant->num_elements = elem_count; - val->constant->elements = elems; - break; - - default: - unreachable("Unsupported type for constants"); - } - break; - } - - case SpvOpSpecConstantOp: { - SpvOp opcode = get_specialization(b, val, w[3]); - switch (opcode) { - case SpvOpVectorShuffle: { - struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); - struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); - unsigned len0 = glsl_get_vector_elements(v0->const_type); - unsigned len1 = glsl_get_vector_elements(v1->const_type); - - uint32_t u[8]; - for (unsigned i = 0; i < len0; i++) - u[i] = v0->constant->value.u[i]; - for (unsigned i = 0; i < len1; i++) - u[len0 + i] = v1->constant->value.u[i]; - - for (unsigned i = 0; i < count - 6; i++) { - uint32_t comp = w[i + 6]; - if (comp == (uint32_t)-1) { - val->constant->value.u[i] = 0xdeadbeef; - } else { - val->constant->value.u[i] = u[comp]; - } - } - return; - } - - case SpvOpCompositeExtract: - case SpvOpCompositeInsert: { - struct vtn_value *comp; - unsigned deref_start; - struct nir_constant **c; - if (opcode == SpvOpCompositeExtract) { - comp = vtn_value(b, w[4], vtn_value_type_constant); - deref_start = 5; - c = &comp->constant; - } else { - comp = vtn_value(b, w[5], vtn_value_type_constant); - deref_start = 6; - val->constant = nir_constant_clone(comp->constant, - (nir_variable *)b); - c = &val->constant; - } - - int elem = -1; - const struct glsl_type *type = comp->const_type; - for (unsigned i = deref_start; i < count; i++) { - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* If we hit this granularity, we're picking off an element */ - if (elem < 0) - elem = 0; - - if (glsl_type_is_matrix(type)) { - elem += w[i] * glsl_get_vector_elements(type); - type = glsl_get_column_type(type); - } else { - assert(glsl_type_is_vector(type)); - elem += w[i]; - type = glsl_scalar_type(glsl_get_base_type(type)); - } - continue; - - case GLSL_TYPE_ARRAY: - c = &(*c)->elements[w[i]]; - type = glsl_get_array_element(type); - continue; - - case GLSL_TYPE_STRUCT: - c = &(*c)->elements[w[i]]; - type = glsl_get_struct_field(type, w[i]); - continue; - - default: - unreachable("Invalid constant type"); - } - } - - if (opcode == SpvOpCompositeExtract) { - if (elem == -1) { - val->constant = *c; - } else { - unsigned num_components = glsl_get_vector_elements(type); - for (unsigned i = 0; i < num_components; i++) - val->constant->value.u[i] = (*c)->value.u[elem + i]; - } - } else { - struct vtn_value *insert = - vtn_value(b, w[4], vtn_value_type_constant); - assert(insert->const_type == type); - if (elem == -1) { - *c = insert->constant; - } else { - unsigned num_components = glsl_get_vector_elements(type); - for (unsigned i = 0; i < num_components; i++) - (*c)->value.u[elem + i] = insert->constant->value.u[i]; - } - } - return; - } - - default: { - bool swap; - nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); - - unsigned num_components = glsl_get_vector_elements(val->const_type); - unsigned bit_size = - glsl_get_bit_size(glsl_get_base_type(val->const_type)); - - nir_const_value src[3]; - assert(count <= 7); - for (unsigned i = 0; i < count - 4; i++) { - nir_constant *c = - vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; - - unsigned j = swap ? 1 - i : i; - assert(bit_size == 32); - for (unsigned k = 0; k < num_components; k++) - src[j].u32[k] = c->value.u[k]; - } - - nir_const_value res = nir_eval_const_opcode(op, num_components, - bit_size, src); - - for (unsigned k = 0; k < num_components; k++) - val->constant->value.u[k] = res.u32[k]; - - return; - } /* default */ - } - } - - case SpvOpConstantNull: - val->constant = vtn_null_constant(b, val->const_type); - break; - - case SpvOpConstantSampler: - assert(!"OpConstantSampler requires Kernel Capability"); - break; - - default: - unreachable("Unhandled opcode"); - } -} - -static void -vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct nir_function *callee = - vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; - - nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); - for (unsigned i = 0; i < call->num_params; i++) { - unsigned arg_id = w[4 + i]; - struct vtn_value *arg = vtn_untyped_value(b, arg_id); - if (arg->value_type == vtn_value_type_access_chain) { - nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); - call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); - } else { - struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); - - /* Make a temporary to store the argument in */ - nir_variable *tmp = - nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); - call->params[i] = nir_deref_var_create(call, tmp); - - vtn_local_store(b, arg_ssa, call->params[i]); - } - } - - nir_variable *out_tmp = NULL; - if (!glsl_type_is_void(callee->return_type)) { - out_tmp = nir_local_variable_create(b->impl, callee->return_type, - "out_tmp"); - call->return_deref = nir_deref_var_create(call, out_tmp); - } - - nir_builder_instr_insert(&b->nb, &call->instr); - - if (glsl_type_is_void(callee->return_type)) { - vtn_push_value(b, w[2], vtn_value_type_undef); - } else { - struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); - retval->ssa = vtn_local_load(b, call->return_deref); - } -} - -struct vtn_ssa_value * -vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) -{ - struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); - val->type = type; - - if (!glsl_type_is_vector_or_scalar(type)) { - unsigned elems = glsl_get_length(type); - val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) { - const struct glsl_type *child_type; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - child_type = glsl_get_column_type(type); - break; - case GLSL_TYPE_ARRAY: - child_type = glsl_get_array_element(type); - break; - case GLSL_TYPE_STRUCT: - child_type = glsl_get_struct_field(type, i); - break; - default: - unreachable("unkown base type"); - } - - val->elems[i] = vtn_create_ssa_value(b, child_type); - } - } - - return val; -} - -static nir_tex_src -vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) -{ - nir_tex_src src; - src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); - src.src_type = type; - return src; -} - -static void -vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpSampledImage) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - val->sampled_image->sampler = - vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; - return; - } else if (opcode == SpvOpImage) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - struct vtn_value *src_val = vtn_untyped_value(b, w[3]); - if (src_val->value_type == vtn_value_type_sampled_image) { - val->access_chain = src_val->sampled_image->image; - } else { - assert(src_val->value_type == vtn_value_type_access_chain); - val->access_chain = src_val->access_chain; - } - return; - } - - struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_sampled_image sampled; - struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); - if (sampled_val->value_type == vtn_value_type_sampled_image) { - sampled = *sampled_val->sampled_image; - } else { - assert(sampled_val->value_type == vtn_value_type_access_chain); - sampled.image = NULL; - sampled.sampler = sampled_val->access_chain; - } - - const struct glsl_type *image_type; - if (sampled.image) { - image_type = sampled.image->var->var->interface_type; - } else { - image_type = sampled.sampler->var->var->interface_type; - } - - nir_tex_src srcs[8]; /* 8 should be enough */ - nir_tex_src *p = srcs; - - unsigned idx = 4; - - bool has_coord = false; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQueryLod: { - /* All these types have the coordinate as their first real argument */ - struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); - has_coord = true; - p->src = nir_src_for_ssa(coord->def); - p->src_type = nir_tex_src_coord; - p++; - break; - } - - default: - break; - } - - /* These all have an explicit depth value as their next source */ - switch (opcode) { - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); - break; - default: - break; - } - - /* For OpImageQuerySizeLod, we always have an LOD */ - if (opcode == SpvOpImageQuerySizeLod) - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); - - /* Figure out the base texture operation */ - nir_texop texop; - switch (opcode) { - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - texop = nir_texop_tex; - break; - - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - texop = nir_texop_txl; - break; - - case SpvOpImageFetch: - if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { - texop = nir_texop_txf_ms; - } else { - texop = nir_texop_txf; - } - break; - - case SpvOpImageGather: - case SpvOpImageDrefGather: - texop = nir_texop_tg4; - break; - - case SpvOpImageQuerySizeLod: - case SpvOpImageQuerySize: - texop = nir_texop_txs; - break; - - case SpvOpImageQueryLod: - texop = nir_texop_lod; - break; - - case SpvOpImageQueryLevels: - texop = nir_texop_query_levels; - break; - - case SpvOpImageQuerySamples: - default: - unreachable("Unhandled opcode"); - } - - /* Now we need to handle some number of optional arguments */ - if (idx < count) { - uint32_t operands = w[idx++]; - - if (operands & SpvImageOperandsBiasMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txb; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); - } - - if (operands & SpvImageOperandsLodMask) { - assert(texop == nir_texop_txl || texop == nir_texop_txf || - texop == nir_texop_txf_ms || texop == nir_texop_txs); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); - } - - if (operands & SpvImageOperandsGradMask) { - assert(texop == nir_texop_tex); - texop = nir_texop_txd; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); - } - - if (operands & SpvImageOperandsOffsetMask || - operands & SpvImageOperandsConstOffsetMask) - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); - - if (operands & SpvImageOperandsConstOffsetsMask) - assert(!"Constant offsets to texture gather not yet implemented"); - - if (operands & SpvImageOperandsSampleMask) { - assert(texop == nir_texop_txf_ms); - texop = nir_texop_txf_ms; - (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); - } - } - /* We should have now consumed exactly all of the arguments */ - assert(idx == count); - - nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); - instr->op = texop; - - memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); - - instr->sampler_dim = glsl_get_sampler_dim(image_type); - instr->is_array = glsl_sampler_type_is_array(image_type); - instr->is_shadow = glsl_sampler_type_is_shadow(image_type); - instr->is_new_style_shadow = instr->is_shadow; - - if (has_coord) { - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - instr->coord_components = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_MS: - instr->coord_components = 2; - break; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - instr->coord_components = 3; - break; - default: - assert("Invalid sampler type"); - } - - if (instr->is_array) - instr->coord_components++; - } else { - instr->coord_components = 0; - } - - switch (glsl_get_sampler_result_type(image_type)) { - case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; - case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; - case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; - default: - unreachable("Invalid base type for sampler result"); - } - - nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); - if (sampled.image) { - nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); - instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); - } else { - instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); - } - - switch (instr->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_txl: - case nir_texop_txd: - /* These operations require a sampler */ - instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); - break; - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_txs: - case nir_texop_lod: - case nir_texop_tg4: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - /* These don't */ - instr->sampler = NULL; - break; - } - - nir_ssa_dest_init(&instr->instr, &instr->dest, - nir_tex_instr_dest_size(instr), 32, NULL); - - assert(glsl_get_vector_elements(ret_type->type) == - nir_tex_instr_dest_size(instr)); - - val->ssa = vtn_create_ssa_value(b, ret_type->type); - val->ssa->def = &instr->dest.ssa; - - nir_builder_instr_insert(&b->nb, &instr->instr); -} - -static nir_ssa_def * -get_image_coord(struct vtn_builder *b, uint32_t value) -{ - struct vtn_ssa_value *coord = vtn_ssa_value(b, value); - - /* The image_load_store intrinsics assume a 4-dim coordinate */ - unsigned dim = glsl_get_vector_elements(coord->type); - unsigned swizzle[4]; - for (unsigned i = 0; i < 4; i++) - swizzle[i] = MIN2(i, dim - 1); - - return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); -} - -static void -vtn_handle_image(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - /* Just get this one out of the way */ - if (opcode == SpvOpImageTexelPointer) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_image_pointer); - val->image = ralloc(b, struct vtn_image_pointer); - - val->image->image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - val->image->coord = get_image_coord(b, w[4]); - val->image->sample = vtn_ssa_value(b, w[5])->def; - return; - } - - struct vtn_image_pointer image; - - switch (opcode) { - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; - break; - - case SpvOpImageQuerySize: - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - image.coord = NULL; - image.sample = NULL; - break; - - case SpvOpImageRead: - image.image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - image.coord = get_image_coord(b, w[4]); - - if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { - assert(w[5] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[6])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1, 32); - } - break; - - case SpvOpImageWrite: - image.image = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; - image.coord = get_image_coord(b, w[2]); - - /* texel = w[3] */ - - if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { - assert(w[4] == SpvImageOperandsSampleMask); - image.sample = vtn_ssa_value(b, w[5])->def; - } else { - image.sample = nir_ssa_undef(&b->nb, 1, 32); - } - break; - - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_op op; - switch (opcode) { -#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; - OP(ImageQuerySize, size) - OP(ImageRead, load) - OP(ImageWrite, store) - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_min) - OP(AtomicUMin, atomic_min) - OP(AtomicSMax, atomic_max) - OP(AtomicUMax, atomic_max) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid image opcode"); - } - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); - - nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); - intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); - - /* ImageQuerySize doesn't take any extra parameters */ - if (opcode != SpvOpImageQuerySize) { - /* The image coordinate is always 4 components but we may not have that - * many. Swizzle to compensate. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < image.coord->num_components ? i : 0; - intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, - swiz, 4, false)); - intrin->src[1] = nir_src_for_ssa(image.sample); - } - - switch (opcode) { - case SpvOpImageQuerySize: - case SpvOpImageRead: - break; - case SpvOpImageWrite: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); - break; - case SpvOpAtomicIIncrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); - break; - case SpvOpAtomicIDecrement: - intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); - break; - - case SpvOpAtomicExchange: - case SpvOpAtomicIAdd: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicCompareExchange: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - case SpvOpAtomicISub: - intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); - break; - - default: - unreachable("Invalid image opcode"); - } - - if (opcode != SpvOpImageWrite) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL); - - nir_builder_instr_insert(&b->nb, &intrin->instr); - - /* The image intrinsics always return 4 channels but we may not want - * that many. Emit a mov to trim it down. - */ - unsigned swiz[4] = {0, 1, 2, 3}; - val->ssa = vtn_create_ssa_value(b, type->type); - val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, - glsl_get_vector_elements(type->type), false); - } else { - nir_builder_instr_insert(&b->nb, &intrin->instr); - } -} - -static nir_intrinsic_op -get_ssbo_nir_atomic_op(SpvOp opcode) -{ - switch (opcode) { -#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_imin) - OP(AtomicUMin, atomic_umin) - OP(AtomicSMax, atomic_imax) - OP(AtomicUMax, atomic_umax) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid SSBO atomic"); - } -} - -static nir_intrinsic_op -get_shared_nir_atomic_op(SpvOp opcode) -{ - switch (opcode) { -#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; - OP(AtomicExchange, atomic_exchange) - OP(AtomicCompareExchange, atomic_comp_swap) - OP(AtomicIIncrement, atomic_add) - OP(AtomicIDecrement, atomic_add) - OP(AtomicIAdd, atomic_add) - OP(AtomicISub, atomic_add) - OP(AtomicSMin, atomic_imin) - OP(AtomicUMin, atomic_umin) - OP(AtomicSMax, atomic_imax) - OP(AtomicUMax, atomic_umax) - OP(AtomicAnd, atomic_and) - OP(AtomicOr, atomic_or) - OP(AtomicXor, atomic_xor) -#undef OP - default: - unreachable("Invalid shared atomic"); - } -} - -static void -fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, nir_src *src) -{ - switch (opcode) { - case SpvOpAtomicIIncrement: - src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); - break; - - case SpvOpAtomicIDecrement: - src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); - break; - - case SpvOpAtomicISub: - src[0] = - nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); - break; - - case SpvOpAtomicCompareExchange: - src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); - break; - /* Fall through */ - - case SpvOpAtomicExchange: - case SpvOpAtomicIAdd: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: - src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); - break; - - default: - unreachable("Invalid SPIR-V atomic"); - } -} - -static void -vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_access_chain *chain = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - nir_intrinsic_instr *atomic; - - /* - SpvScope scope = w[4]; - SpvMemorySemanticsMask semantics = w[5]; - */ - - if (chain->var->mode == vtn_variable_mode_workgroup) { - nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; - nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); - atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); - fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); - } else { - assert(chain->var->mode == vtn_variable_mode_ssbo); - struct vtn_type *type; - nir_ssa_def *offset, *index; - offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); - - nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); - - atomic = nir_intrinsic_instr_create(b->nb.shader, op); - atomic->src[0] = nir_src_for_ssa(index); - atomic->src[1] = nir_src_for_ssa(offset); - fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); - } - - nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->def = &atomic->dest.ssa; - val->ssa->type = type->type; - - nir_builder_instr_insert(&b->nb, &atomic->instr); -} - -static nir_alu_instr * -create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size) -{ - nir_op op; - switch (num_components) { - case 1: op = nir_op_fmov; break; - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("bad vector size"); - } - - nir_alu_instr *vec = nir_alu_instr_create(shader, op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, - bit_size, NULL); - vec->dest.write_mask = (1 << num_components) - 1; - - return vec; -} - -struct vtn_ssa_value * -vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - if (src->transposed) - return src->transposed; - - struct vtn_ssa_value *dest = - vtn_create_ssa_value(b, glsl_transposed_type(src->type)); - - for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { - nir_alu_instr *vec = create_vec(b->shader, - glsl_get_matrix_columns(src->type), - glsl_get_bit_size(glsl_get_base_type(src->type))); - if (glsl_type_is_vector_or_scalar(src->type)) { - vec->src[0].src = nir_src_for_ssa(src->def); - vec->src[0].swizzle[0] = i; - } else { - for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { - vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); - vec->src[j].swizzle[0] = i; - } - } - nir_builder_instr_insert(&b->nb, &vec->instr); - dest->elems[i]->def = &vec->dest.dest.ssa; - } - - dest->transposed = src; - - return dest; -} - -nir_ssa_def * -vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) -{ - unsigned swiz[4] = { index }; - return nir_swizzle(&b->nb, src, swiz, 1, true); -} - -nir_ssa_def * -vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, - unsigned index) -{ - nir_alu_instr *vec = create_vec(b->shader, src->num_components, - src->bit_size); - - for (unsigned i = 0; i < src->num_components; i++) { - if (i == index) { - vec->src[i].src = nir_src_for_ssa(insert); - } else { - vec->src[i].src = nir_src_for_ssa(src); - vec->src[i].swizzle[0] = i; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -nir_ssa_def * -vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_extract(b, src, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_extract(b, src, i), dest); - - return dest; -} - -nir_ssa_def * -vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, nir_ssa_def *index) -{ - nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); - for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), - vtn_vector_insert(b, src, insert, i), dest); - - return dest; -} - -static nir_ssa_def * -vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, - nir_ssa_def *src0, nir_ssa_def *src1, - const uint32_t *indices) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size); - - for (unsigned i = 0; i < num_components; i++) { - uint32_t index = indices[i]; - if (index == 0xffffffff) { - vec->src[i].src = - nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); - } else if (index < src0->num_components) { - vec->src[i].src = nir_src_for_ssa(src0); - vec->src[i].swizzle[0] = index; - } else { - vec->src[i].src = nir_src_for_ssa(src1); - vec->src[i].swizzle[0] = index - src0->num_components; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -/* - * Concatentates a number of vectors/scalars together to produce a vector - */ -static nir_ssa_def * -vtn_vector_construct(struct vtn_builder *b, unsigned num_components, - unsigned num_srcs, nir_ssa_def **srcs) -{ - nir_alu_instr *vec = create_vec(b->shader, num_components, - srcs[0]->bit_size); - - unsigned dest_idx = 0; - for (unsigned i = 0; i < num_srcs; i++) { - nir_ssa_def *src = srcs[i]; - for (unsigned j = 0; j < src->num_components; j++) { - vec->src[dest_idx].src = nir_src_for_ssa(src); - vec->src[dest_idx].swizzle[0] = j; - dest_idx++; - } - } - - nir_builder_instr_insert(&b->nb, &vec->instr); - - return &vec->dest.dest.ssa; -} - -static struct vtn_ssa_value * -vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) -{ - struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); - dest->type = src->type; - - if (glsl_type_is_vector_or_scalar(src->type)) { - dest->def = src->def; - } else { - unsigned elems = glsl_get_length(src->type); - - dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_ssa_value *insert, const uint32_t *indices, - unsigned num_indices) -{ - struct vtn_ssa_value *dest = vtn_composite_copy(b, src); - - struct vtn_ssa_value *cur = dest; - unsigned i; - for (i = 0; i < num_indices - 1; i++) { - cur = cur->elems[indices[i]]; - } - - if (glsl_type_is_vector_or_scalar(cur->type)) { - /* According to the SPIR-V spec, OpCompositeInsert may work down to - * the component granularity. In that case, the last index will be - * the index to insert the scalar into the vector. - */ - - cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); - } else { - cur->elems[indices[i]] = insert; - } - - return dest; -} - -static struct vtn_ssa_value * -vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, - const uint32_t *indices, unsigned num_indices) -{ - struct vtn_ssa_value *cur = src; - for (unsigned i = 0; i < num_indices; i++) { - if (glsl_type_is_vector_or_scalar(cur->type)) { - assert(i == num_indices - 1); - /* According to the SPIR-V spec, OpCompositeExtract may work down to - * the component granularity. The last index will be the index of the - * vector to extract. - */ - - struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); - ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); - ret->def = vtn_vector_extract(b, cur->def, indices[i]); - return ret; - } else { - cur = cur->elems[indices[i]]; - } - } - - return cur; -} - -static void -vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); - - switch (opcode) { - case SpvOpVectorExtractDynamic: - val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def); - break; - - case SpvOpVectorInsertDynamic: - val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - vtn_ssa_value(b, w[5])->def); - break; - - case SpvOpVectorShuffle: - val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), - vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - w + 5); - break; - - case SpvOpCompositeConstruct: { - unsigned elems = count - 3; - if (glsl_type_is_vector_or_scalar(type)) { - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < elems; i++) - srcs[i] = vtn_ssa_value(b, w[3 + i])->def; - val->ssa->def = - vtn_vector_construct(b, glsl_get_vector_elements(type), - elems, srcs); - } else { - val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); - for (unsigned i = 0; i < elems; i++) - val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); - } - break; - } - case SpvOpCompositeExtract: - val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), - w + 4, count - 4); - break; - - case SpvOpCompositeInsert: - val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), - vtn_ssa_value(b, w[3]), - w + 5, count - 5); - break; - - case SpvOpCopyObject: - val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); - break; - - default: - unreachable("unknown composite operation"); - } -} - -static void -vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - nir_intrinsic_op intrinsic_op; - switch (opcode) { - case SpvOpEmitVertex: - case SpvOpEmitStreamVertex: - intrinsic_op = nir_intrinsic_emit_vertex; - break; - case SpvOpEndPrimitive: - case SpvOpEndStreamPrimitive: - intrinsic_op = nir_intrinsic_end_primitive; - break; - case SpvOpMemoryBarrier: - intrinsic_op = nir_intrinsic_memory_barrier; - break; - case SpvOpControlBarrier: - intrinsic_op = nir_intrinsic_barrier; - break; - default: - unreachable("unknown barrier instruction"); - } - - nir_intrinsic_instr *intrin = - nir_intrinsic_instr_create(b->shader, intrinsic_op); - - if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) - nir_intrinsic_set_stream_id(intrin, w[1]); - - nir_builder_instr_insert(&b->nb, &intrin->instr); -} - -static unsigned -gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - case SpvExecutionModeOutputPoints: - return 0; /* GL_POINTS */ - case SpvExecutionModeInputLines: - return 1; /* GL_LINES */ - case SpvExecutionModeInputLinesAdjacency: - return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ - case SpvExecutionModeTriangles: - return 4; /* GL_TRIANGLES */ - case SpvExecutionModeInputTrianglesAdjacency: - return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ - case SpvExecutionModeQuads: - return 7; /* GL_QUADS */ - case SpvExecutionModeIsolines: - return 0x8E7A; /* GL_ISOLINES */ - case SpvExecutionModeOutputLineStrip: - return 3; /* GL_LINE_STRIP */ - case SpvExecutionModeOutputTriangleStrip: - return 5; /* GL_TRIANGLE_STRIP */ - default: - assert(!"Invalid primitive type"); - return 4; - } -} - -static unsigned -vertices_in_from_spv_execution_mode(SpvExecutionMode mode) -{ - switch (mode) { - case SpvExecutionModeInputPoints: - return 1; - case SpvExecutionModeInputLines: - return 2; - case SpvExecutionModeInputLinesAdjacency: - return 4; - case SpvExecutionModeTriangles: - return 3; - case SpvExecutionModeInputTrianglesAdjacency: - return 6; - default: - assert(!"Invalid GS input mode"); - return 0; - } -} - -static gl_shader_stage -stage_for_execution_model(SpvExecutionModel model) -{ - switch (model) { - case SpvExecutionModelVertex: - return MESA_SHADER_VERTEX; - case SpvExecutionModelTessellationControl: - return MESA_SHADER_TESS_CTRL; - case SpvExecutionModelTessellationEvaluation: - return MESA_SHADER_TESS_EVAL; - case SpvExecutionModelGeometry: - return MESA_SHADER_GEOMETRY; - case SpvExecutionModelFragment: - return MESA_SHADER_FRAGMENT; - case SpvExecutionModelGLCompute: - return MESA_SHADER_COMPUTE; - default: - unreachable("Unsupported execution model"); - } -} - -static bool -vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpSource: - case SpvOpSourceExtension: - case SpvOpSourceContinued: - case SpvOpExtension: - /* Unhandled, but these are for debug so that's ok. */ - break; - - case SpvOpCapability: { - SpvCapability cap = w[1]; - switch (cap) { - case SpvCapabilityMatrix: - case SpvCapabilityShader: - case SpvCapabilityGeometry: - case SpvCapabilityTessellationPointSize: - case SpvCapabilityGeometryPointSize: - case SpvCapabilityUniformBufferArrayDynamicIndexing: - case SpvCapabilitySampledImageArrayDynamicIndexing: - case SpvCapabilityStorageBufferArrayDynamicIndexing: - case SpvCapabilityStorageImageArrayDynamicIndexing: - case SpvCapabilityImageRect: - case SpvCapabilitySampledRect: - case SpvCapabilitySampled1D: - case SpvCapabilityImage1D: - case SpvCapabilitySampledCubeArray: - case SpvCapabilitySampledBuffer: - case SpvCapabilityImageBuffer: - case SpvCapabilityImageQuery: - break; - case SpvCapabilityClipDistance: - case SpvCapabilityCullDistance: - case SpvCapabilityGeometryStreams: - fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n"); - break; - default: - assert(!"Unsupported capability"); - } - break; - } - - case SpvOpExtInstImport: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpMemoryModel: - assert(w[1] == SpvAddressingModelLogical); - assert(w[2] == SpvMemoryModelGLSL450); - break; - - case SpvOpEntryPoint: { - struct vtn_value *entry_point = &b->values[w[2]]; - /* Let this be a name label regardless */ - unsigned name_words; - entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); - - if (strcmp(entry_point->name, b->entry_point_name) != 0 || - stage_for_execution_model(w[1]) != b->entry_point_stage) - break; - - assert(b->entry_point == NULL); - b->entry_point = entry_point; - break; - } - - case SpvOpString: - vtn_push_value(b, w[1], vtn_value_type_string)->str = - vtn_string_literal(b, &w[2], count - 2, NULL); - break; - - case SpvOpName: - b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); - break; - - case SpvOpMemberName: - /* TODO */ - break; - - case SpvOpExecutionMode: - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - vtn_handle_decoration(b, opcode, w, count); - break; - - default: - return false; /* End of preamble */ - } - - return true; -} - -static void -vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, - const struct vtn_decoration *mode, void *data) -{ - assert(b->entry_point == entry_point); - - switch(mode->exec_mode) { - case SpvExecutionModeOriginUpperLeft: - case SpvExecutionModeOriginLowerLeft: - b->origin_upper_left = - (mode->exec_mode == SpvExecutionModeOriginUpperLeft); - break; - - case SpvExecutionModeEarlyFragmentTests: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.early_fragment_tests = true; - break; - - case SpvExecutionModeInvocations: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); - break; - - case SpvExecutionModeDepthReplacing: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; - break; - case SpvExecutionModeDepthGreater: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; - break; - case SpvExecutionModeDepthLess: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; - break; - case SpvExecutionModeDepthUnchanged: - assert(b->shader->stage == MESA_SHADER_FRAGMENT); - b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; - break; - - case SpvExecutionModeLocalSize: - assert(b->shader->stage == MESA_SHADER_COMPUTE); - b->shader->info.cs.local_size[0] = mode->literals[0]; - b->shader->info.cs.local_size[1] = mode->literals[1]; - b->shader->info.cs.local_size[2] = mode->literals[2]; - break; - case SpvExecutionModeLocalSizeHint: - break; /* Nothing do do with this */ - - case SpvExecutionModeOutputVertices: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.vertices_out = mode->literals[0]; - break; - - case SpvExecutionModeInputPoints: - case SpvExecutionModeInputLines: - case SpvExecutionModeInputLinesAdjacency: - case SpvExecutionModeTriangles: - case SpvExecutionModeInputTrianglesAdjacency: - case SpvExecutionModeQuads: - case SpvExecutionModeIsolines: - if (b->shader->stage == MESA_SHADER_GEOMETRY) { - b->shader->info.gs.vertices_in = - vertices_in_from_spv_execution_mode(mode->exec_mode); - } else { - assert(!"Tesselation shaders not yet supported"); - } - break; - - case SpvExecutionModeOutputPoints: - case SpvExecutionModeOutputLineStrip: - case SpvExecutionModeOutputTriangleStrip: - assert(b->shader->stage == MESA_SHADER_GEOMETRY); - b->shader->info.gs.output_primitive = - gl_primitive_from_spv_execution_mode(mode->exec_mode); - break; - - case SpvExecutionModeSpacingEqual: - case SpvExecutionModeSpacingFractionalEven: - case SpvExecutionModeSpacingFractionalOdd: - case SpvExecutionModeVertexOrderCw: - case SpvExecutionModeVertexOrderCcw: - case SpvExecutionModePointMode: - assert(!"TODO: Add tessellation metadata"); - break; - - case SpvExecutionModePixelCenterInteger: - case SpvExecutionModeXfb: - assert(!"Unhandled execution mode"); - break; - - case SpvExecutionModeVecTypeHint: - case SpvExecutionModeContractionOff: - break; /* OpenCL */ - } -} - -static bool -vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpSource: - case SpvOpSourceContinued: - case SpvOpSourceExtension: - case SpvOpExtension: - case SpvOpCapability: - case SpvOpExtInstImport: - case SpvOpMemoryModel: - case SpvOpEntryPoint: - case SpvOpExecutionMode: - case SpvOpString: - case SpvOpName: - case SpvOpMemberName: - case SpvOpDecorationGroup: - case SpvOpDecorate: - case SpvOpMemberDecorate: - case SpvOpGroupDecorate: - case SpvOpGroupMemberDecorate: - assert(!"Invalid opcode types and variables section"); - break; - - case SpvOpTypeVoid: - case SpvOpTypeBool: - case SpvOpTypeInt: - case SpvOpTypeFloat: - case SpvOpTypeVector: - case SpvOpTypeMatrix: - case SpvOpTypeImage: - case SpvOpTypeSampler: - case SpvOpTypeSampledImage: - case SpvOpTypeArray: - case SpvOpTypeRuntimeArray: - case SpvOpTypeStruct: - case SpvOpTypeOpaque: - case SpvOpTypePointer: - case SpvOpTypeFunction: - case SpvOpTypeEvent: - case SpvOpTypeDeviceEvent: - case SpvOpTypeReserveId: - case SpvOpTypeQueue: - case SpvOpTypePipe: - vtn_handle_type(b, opcode, w, count); - break; - - case SpvOpConstantTrue: - case SpvOpConstantFalse: - case SpvOpConstant: - case SpvOpConstantComposite: - case SpvOpConstantSampler: - case SpvOpConstantNull: - case SpvOpSpecConstantTrue: - case SpvOpSpecConstantFalse: - case SpvOpSpecConstant: - case SpvOpSpecConstantComposite: - case SpvOpSpecConstantOp: - vtn_handle_constant(b, opcode, w, count); - break; - - case SpvOpVariable: - vtn_handle_variables(b, opcode, w, count); - break; - - default: - return false; /* End of preamble */ - } - - return true; -} - -static bool -vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpLabel: - break; - - case SpvOpLoopMerge: - case SpvOpSelectionMerge: - /* This is handled by cfg pre-pass and walk_blocks */ - break; - - case SpvOpUndef: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); - val->type = vtn_value(b, w[1], vtn_value_type_type)->type; - break; - } - - case SpvOpExtInst: - vtn_handle_extension(b, opcode, w, count); - break; - - case SpvOpVariable: - case SpvOpLoad: - case SpvOpStore: - case SpvOpCopyMemory: - case SpvOpCopyMemorySized: - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: - case SpvOpArrayLength: - vtn_handle_variables(b, opcode, w, count); - break; - - case SpvOpFunctionCall: - vtn_handle_function_call(b, opcode, w, count); - break; - - case SpvOpSampledImage: - case SpvOpImage: - case SpvOpImageSampleImplicitLod: - case SpvOpImageSampleExplicitLod: - case SpvOpImageSampleDrefImplicitLod: - case SpvOpImageSampleDrefExplicitLod: - case SpvOpImageSampleProjImplicitLod: - case SpvOpImageSampleProjExplicitLod: - case SpvOpImageSampleProjDrefImplicitLod: - case SpvOpImageSampleProjDrefExplicitLod: - case SpvOpImageFetch: - case SpvOpImageGather: - case SpvOpImageDrefGather: - case SpvOpImageQuerySizeLod: - case SpvOpImageQueryLod: - case SpvOpImageQueryLevels: - case SpvOpImageQuerySamples: - vtn_handle_texture(b, opcode, w, count); - break; - - case SpvOpImageRead: - case SpvOpImageWrite: - case SpvOpImageTexelPointer: - vtn_handle_image(b, opcode, w, count); - break; - - case SpvOpImageQuerySize: { - struct vtn_access_chain *image = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - if (glsl_type_is_image(image->var->var->interface_type)) { - vtn_handle_image(b, opcode, w, count); - } else { - vtn_handle_texture(b, opcode, w, count); - } - break; - } - - case SpvOpAtomicExchange: - case SpvOpAtomicCompareExchange: - case SpvOpAtomicCompareExchangeWeak: - case SpvOpAtomicIIncrement: - case SpvOpAtomicIDecrement: - case SpvOpAtomicIAdd: - case SpvOpAtomicISub: - case SpvOpAtomicSMin: - case SpvOpAtomicUMin: - case SpvOpAtomicSMax: - case SpvOpAtomicUMax: - case SpvOpAtomicAnd: - case SpvOpAtomicOr: - case SpvOpAtomicXor: { - struct vtn_value *pointer = vtn_untyped_value(b, w[3]); - if (pointer->value_type == vtn_value_type_image_pointer) { - vtn_handle_image(b, opcode, w, count); - } else { - assert(pointer->value_type == vtn_value_type_access_chain); - vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); - } - break; - } - - case SpvOpSNegate: - case SpvOpFNegate: - case SpvOpNot: - case SpvOpAny: - case SpvOpAll: - case SpvOpConvertFToU: - case SpvOpConvertFToS: - case SpvOpConvertSToF: - case SpvOpConvertUToF: - case SpvOpUConvert: - case SpvOpSConvert: - case SpvOpFConvert: - case SpvOpQuantizeToF16: - case SpvOpConvertPtrToU: - case SpvOpConvertUToPtr: - case SpvOpPtrCastToGeneric: - case SpvOpGenericCastToPtr: - case SpvOpBitcast: - case SpvOpIsNan: - case SpvOpIsInf: - case SpvOpIsFinite: - case SpvOpIsNormal: - case SpvOpSignBitSet: - case SpvOpLessOrGreater: - case SpvOpOrdered: - case SpvOpUnordered: - case SpvOpIAdd: - case SpvOpFAdd: - case SpvOpISub: - case SpvOpFSub: - case SpvOpIMul: - case SpvOpFMul: - case SpvOpUDiv: - case SpvOpSDiv: - case SpvOpFDiv: - case SpvOpUMod: - case SpvOpSRem: - case SpvOpSMod: - case SpvOpFRem: - case SpvOpFMod: - case SpvOpVectorTimesScalar: - case SpvOpDot: - case SpvOpIAddCarry: - case SpvOpISubBorrow: - case SpvOpUMulExtended: - case SpvOpSMulExtended: - case SpvOpShiftRightLogical: - case SpvOpShiftRightArithmetic: - case SpvOpShiftLeftLogical: - case SpvOpLogicalEqual: - case SpvOpLogicalNotEqual: - case SpvOpLogicalOr: - case SpvOpLogicalAnd: - case SpvOpLogicalNot: - case SpvOpBitwiseOr: - case SpvOpBitwiseXor: - case SpvOpBitwiseAnd: - case SpvOpSelect: - case SpvOpIEqual: - case SpvOpFOrdEqual: - case SpvOpFUnordEqual: - case SpvOpINotEqual: - case SpvOpFOrdNotEqual: - case SpvOpFUnordNotEqual: - case SpvOpULessThan: - case SpvOpSLessThan: - case SpvOpFOrdLessThan: - case SpvOpFUnordLessThan: - case SpvOpUGreaterThan: - case SpvOpSGreaterThan: - case SpvOpFOrdGreaterThan: - case SpvOpFUnordGreaterThan: - case SpvOpULessThanEqual: - case SpvOpSLessThanEqual: - case SpvOpFOrdLessThanEqual: - case SpvOpFUnordLessThanEqual: - case SpvOpUGreaterThanEqual: - case SpvOpSGreaterThanEqual: - case SpvOpFOrdGreaterThanEqual: - case SpvOpFUnordGreaterThanEqual: - case SpvOpDPdx: - case SpvOpDPdy: - case SpvOpFwidth: - case SpvOpDPdxFine: - case SpvOpDPdyFine: - case SpvOpFwidthFine: - case SpvOpDPdxCoarse: - case SpvOpDPdyCoarse: - case SpvOpFwidthCoarse: - case SpvOpBitFieldInsert: - case SpvOpBitFieldSExtract: - case SpvOpBitFieldUExtract: - case SpvOpBitReverse: - case SpvOpBitCount: - case SpvOpTranspose: - case SpvOpOuterProduct: - case SpvOpMatrixTimesScalar: - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - vtn_handle_alu(b, opcode, w, count); - break; - - case SpvOpVectorExtractDynamic: - case SpvOpVectorInsertDynamic: - case SpvOpVectorShuffle: - case SpvOpCompositeConstruct: - case SpvOpCompositeExtract: - case SpvOpCompositeInsert: - case SpvOpCopyObject: - vtn_handle_composite(b, opcode, w, count); - break; - - case SpvOpEmitVertex: - case SpvOpEndPrimitive: - case SpvOpEmitStreamVertex: - case SpvOpEndStreamPrimitive: - case SpvOpControlBarrier: - case SpvOpMemoryBarrier: - vtn_handle_barrier(b, opcode, w, count); - break; - - default: - unreachable("Unhandled opcode"); - } - - return true; -} - -nir_function * -spirv_to_nir(const uint32_t *words, size_t word_count, - struct nir_spirv_specialization *spec, unsigned num_spec, - gl_shader_stage stage, const char *entry_point_name, - const nir_shader_compiler_options *options) -{ - const uint32_t *word_end = words + word_count; - - /* Handle the SPIR-V header (first 4 dwords) */ - assert(word_count > 5); - - assert(words[0] == SpvMagicNumber); - assert(words[1] >= 0x10000); - /* words[2] == generator magic */ - unsigned value_id_bound = words[3]; - assert(words[4] == 0); - - words+= 5; - - /* Initialize the stn_builder object */ - struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); - b->value_id_bound = value_id_bound; - b->values = rzalloc_array(b, struct vtn_value, value_id_bound); - exec_list_make_empty(&b->functions); - b->entry_point_stage = stage; - b->entry_point_name = entry_point_name; - - /* Handle all the preamble instructions */ - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_preamble_instruction); - - if (b->entry_point == NULL) { - assert(!"Entry point not found"); - ralloc_free(b); - return NULL; - } - - b->shader = nir_shader_create(NULL, stage, options); - - /* Set shader info defaults */ - b->shader->info.gs.invocations = 1; - - /* Parse execution modes */ - vtn_foreach_execution_mode(b, b->entry_point, - vtn_handle_execution_mode, NULL); - - b->specializations = spec; - b->num_specializations = num_spec; - - /* Handle all variable, type, and constant instructions */ - words = vtn_foreach_instruction(b, words, word_end, - vtn_handle_variable_or_type_instruction); - - vtn_build_cfg(b, words, word_end); - - foreach_list_typed(struct vtn_function, func, node, &b->functions) { - b->impl = func->impl; - b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - vtn_function_emit(b, func, vtn_handle_body_instruction); - } - - assert(b->entry_point->value_type == vtn_value_type_function); - nir_function *entry_point = b->entry_point->func->impl->function; - assert(entry_point); - - ralloc_free(b); - - return entry_point; -} diff --git a/src/compiler/nir/spirv/vtn_alu.c b/src/compiler/nir/spirv/vtn_alu.c deleted file mode 100644 index 8b9a63ce760..00000000000 --- a/src/compiler/nir/spirv/vtn_alu.c +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "vtn_private.h" - -/* - * Normally, column vectors in SPIR-V correspond to a single NIR SSA - * definition. But for matrix multiplies, we want to do one routine for - * multiplying a matrix by a matrix and then pretend that vectors are matrices - * with one column. So we "wrap" these things, and unwrap the result before we - * send it off. - */ - -static struct vtn_ssa_value * -wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) -{ - if (val == NULL) - return NULL; - - if (glsl_type_is_matrix(val->type)) - return val; - - struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); - dest->type = val->type; - dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); - dest->elems[0] = val; - - return dest; -} - -static struct vtn_ssa_value * -unwrap_matrix(struct vtn_ssa_value *val) -{ - if (glsl_type_is_matrix(val->type)) - return val; - - return val->elems[0]; -} - -static struct vtn_ssa_value * -matrix_multiply(struct vtn_builder *b, - struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) -{ - - struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); - struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); - struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); - struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); - - unsigned src0_rows = glsl_get_vector_elements(src0->type); - unsigned src0_columns = glsl_get_matrix_columns(src0->type); - unsigned src1_columns = glsl_get_matrix_columns(src1->type); - - const struct glsl_type *dest_type; - if (src1_columns > 1) { - dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), - src0_rows, src1_columns); - } else { - dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); - } - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); - - dest = wrap_matrix(b, dest); - - bool transpose_result = false; - if (src0_transpose && src1_transpose) { - /* transpose(A) * transpose(B) = transpose(B * A) */ - src1 = src0_transpose; - src0 = src1_transpose; - src0_transpose = NULL; - src1_transpose = NULL; - transpose_result = true; - } - - if (src0_transpose && !src1_transpose && - glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { - /* We already have the rows of src0 and the columns of src1 available, - * so we can just take the dot product of each row with each column to - * get the result. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - nir_ssa_def *vec_src[4]; - for (unsigned j = 0; j < src0_rows; j++) { - vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, - src1->elems[i]->def); - } - dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); - } - } else { - /* We don't handle the case where src1 is transposed but not src0, since - * the general case only uses individual components of src1 so the - * optimizer should chew through the transpose we emitted for src1. - */ - - for (unsigned i = 0; i < src1_columns; i++) { - /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ - dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[0]->def, - nir_channel(&b->nb, src1->elems[i]->def, 0)); - for (unsigned j = 1; j < src0_columns; j++) { - dest->elems[i]->def = - nir_fadd(&b->nb, dest->elems[i]->def, - nir_fmul(&b->nb, src0->elems[j]->def, - nir_channel(&b->nb, src1->elems[i]->def, j))); - } - } - } - - dest = unwrap_matrix(dest); - - if (transpose_result) - dest = vtn_ssa_transpose(b, dest); - - return dest; -} - -static struct vtn_ssa_value * -mat_times_scalar(struct vtn_builder *b, - struct vtn_ssa_value *mat, - nir_ssa_def *scalar) -{ - struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); - for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { - if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) - dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); - else - dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); - } - - return dest; -} - -static void -vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, - struct vtn_value *dest, - struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) -{ - switch (opcode) { - case SpvOpFNegate: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); - break; - } - - case SpvOpFAdd: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = - nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); - break; - } - - case SpvOpFSub: { - dest->ssa = vtn_create_ssa_value(b, src0->type); - unsigned cols = glsl_get_matrix_columns(src0->type); - for (unsigned i = 0; i < cols; i++) - dest->ssa->elems[i]->def = - nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); - break; - } - - case SpvOpTranspose: - dest->ssa = vtn_ssa_transpose(b, src0); - break; - - case SpvOpMatrixTimesScalar: - if (src0->transposed) { - dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, - src1->def)); - } else { - dest->ssa = mat_times_scalar(b, src0, src1->def); - } - break; - - case SpvOpVectorTimesMatrix: - case SpvOpMatrixTimesVector: - case SpvOpMatrixTimesMatrix: - if (opcode == SpvOpVectorTimesMatrix) { - dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); - } else { - dest->ssa = matrix_multiply(b, src0, src1); - } - break; - - default: unreachable("unknown matrix opcode"); - } -} - -nir_op -vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) -{ - /* Indicates that the first two arguments should be swapped. This is - * used for implementing greater-than and less-than-or-equal. - */ - *swap = false; - - switch (opcode) { - case SpvOpSNegate: return nir_op_ineg; - case SpvOpFNegate: return nir_op_fneg; - case SpvOpNot: return nir_op_inot; - case SpvOpIAdd: return nir_op_iadd; - case SpvOpFAdd: return nir_op_fadd; - case SpvOpISub: return nir_op_isub; - case SpvOpFSub: return nir_op_fsub; - case SpvOpIMul: return nir_op_imul; - case SpvOpFMul: return nir_op_fmul; - case SpvOpUDiv: return nir_op_udiv; - case SpvOpSDiv: return nir_op_idiv; - case SpvOpFDiv: return nir_op_fdiv; - case SpvOpUMod: return nir_op_umod; - case SpvOpSMod: return nir_op_imod; - case SpvOpFMod: return nir_op_fmod; - case SpvOpSRem: return nir_op_irem; - case SpvOpFRem: return nir_op_frem; - - case SpvOpShiftRightLogical: return nir_op_ushr; - case SpvOpShiftRightArithmetic: return nir_op_ishr; - case SpvOpShiftLeftLogical: return nir_op_ishl; - case SpvOpLogicalOr: return nir_op_ior; - case SpvOpLogicalEqual: return nir_op_ieq; - case SpvOpLogicalNotEqual: return nir_op_ine; - case SpvOpLogicalAnd: return nir_op_iand; - case SpvOpLogicalNot: return nir_op_inot; - case SpvOpBitwiseOr: return nir_op_ior; - case SpvOpBitwiseXor: return nir_op_ixor; - case SpvOpBitwiseAnd: return nir_op_iand; - case SpvOpSelect: return nir_op_bcsel; - case SpvOpIEqual: return nir_op_ieq; - - case SpvOpBitFieldInsert: return nir_op_bitfield_insert; - case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; - case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; - case SpvOpBitReverse: return nir_op_bitfield_reverse; - case SpvOpBitCount: return nir_op_bit_count; - - /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ - case SpvOpFOrdEqual: return nir_op_feq; - case SpvOpFUnordEqual: return nir_op_feq; - case SpvOpINotEqual: return nir_op_ine; - case SpvOpFOrdNotEqual: return nir_op_fne; - case SpvOpFUnordNotEqual: return nir_op_fne; - case SpvOpULessThan: return nir_op_ult; - case SpvOpSLessThan: return nir_op_ilt; - case SpvOpFOrdLessThan: return nir_op_flt; - case SpvOpFUnordLessThan: return nir_op_flt; - case SpvOpUGreaterThan: *swap = true; return nir_op_ult; - case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; - case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; - case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; - case SpvOpULessThanEqual: *swap = true; return nir_op_uge; - case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; - case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; - case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; - case SpvOpUGreaterThanEqual: return nir_op_uge; - case SpvOpSGreaterThanEqual: return nir_op_ige; - case SpvOpFOrdGreaterThanEqual: return nir_op_fge; - case SpvOpFUnordGreaterThanEqual: return nir_op_fge; - - /* Conversions: */ - case SpvOpConvertFToU: return nir_op_f2u; - case SpvOpConvertFToS: return nir_op_f2i; - case SpvOpConvertSToF: return nir_op_i2f; - case SpvOpConvertUToF: return nir_op_u2f; - case SpvOpBitcast: return nir_op_imov; - case SpvOpUConvert: - case SpvOpQuantizeToF16: return nir_op_fquantize2f16; - /* TODO: NIR is 32-bit only; these are no-ops. */ - case SpvOpSConvert: return nir_op_imov; - case SpvOpFConvert: return nir_op_fmov; - - /* Derivatives: */ - case SpvOpDPdx: return nir_op_fddx; - case SpvOpDPdy: return nir_op_fddy; - case SpvOpDPdxFine: return nir_op_fddx_fine; - case SpvOpDPdyFine: return nir_op_fddy_fine; - case SpvOpDPdxCoarse: return nir_op_fddx_coarse; - case SpvOpDPdyCoarse: return nir_op_fddy_coarse; - - default: - unreachable("No NIR equivalent"); - } -} - -static void -handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *_void) -{ - assert(dec->scope == VTN_DEC_DECORATION); - if (dec->decoration != SpvDecorationNoContraction) - return; - - b->nb.exact = true; -} - -void -vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - - vtn_foreach_decoration(b, val, handle_no_contraction, NULL); - - /* Collect the various SSA sources */ - const unsigned num_inputs = count - 3; - struct vtn_ssa_value *vtn_src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) - vtn_src[i] = vtn_ssa_value(b, w[i + 3]); - - if (glsl_type_is_matrix(vtn_src[0]->type) || - (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { - vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); - b->nb.exact = false; - return; - } - - val->ssa = vtn_create_ssa_value(b, type); - nir_ssa_def *src[4] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) { - assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); - src[i] = vtn_src[i]->def; - } - - switch (opcode) { - case SpvOpAny: - if (src[0]->num_components == 1) { - val->ssa->def = nir_imov(&b->nb, src[0]); - } else { - nir_op op; - switch (src[0]->num_components) { - case 2: op = nir_op_bany_inequal2; break; - case 3: op = nir_op_bany_inequal3; break; - case 4: op = nir_op_bany_inequal4; break; - } - val->ssa->def = nir_build_alu(&b->nb, op, src[0], - nir_imm_int(&b->nb, NIR_FALSE), - NULL, NULL); - } - break; - - case SpvOpAll: - if (src[0]->num_components == 1) { - val->ssa->def = nir_imov(&b->nb, src[0]); - } else { - nir_op op; - switch (src[0]->num_components) { - case 2: op = nir_op_ball_iequal2; break; - case 3: op = nir_op_ball_iequal3; break; - case 4: op = nir_op_ball_iequal4; break; - } - val->ssa->def = nir_build_alu(&b->nb, op, src[0], - nir_imm_int(&b->nb, NIR_TRUE), - NULL, NULL); - } - break; - - case SpvOpOuterProduct: { - for (unsigned i = 0; i < src[1]->num_components; i++) { - val->ssa->elems[i]->def = - nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); - } - break; - } - - case SpvOpDot: - val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); - break; - - case SpvOpIAddCarry: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); - break; - - case SpvOpISubBorrow: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); - break; - - case SpvOpUMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); - break; - - case SpvOpSMulExtended: - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); - val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); - break; - - case SpvOpFwidth: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); - break; - case SpvOpFwidthFine: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); - break; - case SpvOpFwidthCoarse: - val->ssa->def = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); - break; - - case SpvOpVectorTimesScalar: - /* The builder will take care of splatting for us. */ - val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); - break; - - case SpvOpIsNan: - val->ssa->def = nir_fne(&b->nb, src[0], src[0]); - break; - - case SpvOpIsInf: - val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), - nir_imm_float(&b->nb, INFINITY)); - break; - - default: { - bool swap; - nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); - - if (swap) { - nir_ssa_def *tmp = src[0]; - src[0] = src[1]; - src[1] = tmp; - } - - val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); - break; - } /* default */ - } - - b->nb.exact = false; -} diff --git a/src/compiler/nir/spirv/vtn_cfg.c b/src/compiler/nir/spirv/vtn_cfg.c deleted file mode 100644 index 6a43ef8b2dd..00000000000 --- a/src/compiler/nir/spirv/vtn_cfg.c +++ /dev/null @@ -1,778 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "vtn_private.h" -#include "nir/nir_vla.h" - -static bool -vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpFunction: { - assert(b->func == NULL); - b->func = rzalloc(b, struct vtn_function); - - list_inithead(&b->func->body); - b->func->control = w[3]; - - const struct glsl_type *result_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); - val->func = b->func; - - const struct glsl_type *func_type = - vtn_value(b, w[4], vtn_value_type_type)->type->type; - - assert(glsl_get_function_return_type(func_type) == result_type); - - nir_function *func = - nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); - - func->num_params = glsl_get_length(func_type); - func->params = ralloc_array(b->shader, nir_parameter, func->num_params); - for (unsigned i = 0; i < func->num_params; i++) { - const struct glsl_function_param *param = - glsl_get_function_param(func_type, i); - func->params[i].type = param->type; - if (param->in) { - if (param->out) { - func->params[i].param_type = nir_parameter_inout; - } else { - func->params[i].param_type = nir_parameter_in; - } - } else { - if (param->out) { - func->params[i].param_type = nir_parameter_out; - } else { - assert(!"Parameter is neither in nor out"); - } - } - } - - func->return_type = glsl_get_function_return_type(func_type); - - b->func->impl = nir_function_impl_create(func); - - b->func_param_idx = 0; - break; - } - - case SpvOpFunctionEnd: - b->func->end = w; - b->func = NULL; - break; - - case SpvOpFunctionParameter: { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - - assert(b->func_param_idx < b->func->impl->num_params); - nir_variable *param = b->func->impl->params[b->func_param_idx++]; - - assert(param->type == type->type); - - /* Name the parameter so it shows up nicely in NIR */ - param->name = ralloc_strdup(param, val->name); - - struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); - vtn_var->type = type; - vtn_var->var = param; - vtn_var->chain.var = vtn_var; - vtn_var->chain.length = 0; - - struct vtn_type *without_array = type; - while(glsl_type_is_array(without_array->type)) - without_array = without_array->array_element; - - if (glsl_type_is_image(without_array->type)) { - vtn_var->mode = vtn_variable_mode_image; - param->interface_type = without_array->type; - } else if (glsl_type_is_sampler(without_array->type)) { - vtn_var->mode = vtn_variable_mode_sampler; - param->interface_type = without_array->type; - } else { - vtn_var->mode = vtn_variable_mode_param; - } - - val->access_chain = &vtn_var->chain; - break; - } - - case SpvOpLabel: { - assert(b->block == NULL); - b->block = rzalloc(b, struct vtn_block); - b->block->node.type = vtn_cf_node_type_block; - b->block->label = w; - vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; - - if (b->func->start_block == NULL) { - /* This is the first block encountered for this function. In this - * case, we set the start block and add it to the list of - * implemented functions that we'll walk later. - */ - b->func->start_block = b->block; - exec_list_push_tail(&b->functions, &b->func->node); - } - break; - } - - case SpvOpSelectionMerge: - case SpvOpLoopMerge: - assert(b->block && b->block->merge == NULL); - b->block->merge = w; - break; - - case SpvOpBranch: - case SpvOpBranchConditional: - case SpvOpSwitch: - case SpvOpKill: - case SpvOpReturn: - case SpvOpReturnValue: - case SpvOpUnreachable: - assert(b->block && b->block->branch == NULL); - b->block->branch = w; - b->block = NULL; - break; - - default: - /* Continue on as per normal */ - return true; - } - - return true; -} - -static void -vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, - struct vtn_block *break_block, - uint32_t block_id, uint32_t val, bool is_default) -{ - struct vtn_block *case_block = - vtn_value(b, block_id, vtn_value_type_block)->block; - - /* Don't create dummy cases that just break */ - if (case_block == break_block) - return; - - if (case_block->switch_case == NULL) { - struct vtn_case *c = ralloc(b, struct vtn_case); - - list_inithead(&c->body); - c->start_block = case_block; - c->fallthrough = NULL; - nir_array_init(&c->values, b); - c->is_default = false; - c->visited = false; - - list_addtail(&c->link, &swtch->cases); - - case_block->switch_case = c; - } - - if (is_default) { - case_block->switch_case->is_default = true; - } else { - nir_array_add(&case_block->switch_case->values, uint32_t, val); - } -} - -/* This function performs a depth-first search of the cases and puts them - * in fall-through order. - */ -static void -vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) -{ - if (cse->visited) - return; - - cse->visited = true; - - list_del(&cse->link); - - if (cse->fallthrough) { - vtn_order_case(swtch, cse->fallthrough); - - /* If we have a fall-through, place this case right before the case it - * falls through to. This ensures that fallthroughs come one after - * the other. These two can never get separated because that would - * imply something else falling through to the same case. Also, this - * can't break ordering because the DFS ensures that this case is - * visited before anything that falls through to it. - */ - list_addtail(&cse->link, &cse->fallthrough->link); - } else { - list_add(&cse->link, &swtch->cases); - } -} - -static enum vtn_branch_type -vtn_get_branch_type(struct vtn_block *block, - struct vtn_case *swcase, struct vtn_block *switch_break, - struct vtn_block *loop_break, struct vtn_block *loop_cont) -{ - if (block->switch_case) { - /* This branch is actually a fallthrough */ - assert(swcase->fallthrough == NULL || - swcase->fallthrough == block->switch_case); - swcase->fallthrough = block->switch_case; - return vtn_branch_type_switch_fallthrough; - } else if (block == switch_break) { - return vtn_branch_type_switch_break; - } else if (block == loop_break) { - return vtn_branch_type_loop_break; - } else if (block == loop_cont) { - return vtn_branch_type_loop_continue; - } else { - return vtn_branch_type_none; - } -} - -static void -vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, - struct vtn_block *start, struct vtn_case *switch_case, - struct vtn_block *switch_break, - struct vtn_block *loop_break, struct vtn_block *loop_cont, - struct vtn_block *end) -{ - struct vtn_block *block = start; - while (block != end) { - if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && - !block->loop) { - struct vtn_loop *loop = ralloc(b, struct vtn_loop); - - loop->node.type = vtn_cf_node_type_loop; - list_inithead(&loop->body); - list_inithead(&loop->cont_body); - loop->control = block->merge[3]; - - list_addtail(&loop->node.link, cf_list); - block->loop = loop; - - struct vtn_block *new_loop_break = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - struct vtn_block *new_loop_cont = - vtn_value(b, block->merge[2], vtn_value_type_block)->block; - - /* Note: This recursive call will start with the current block as - * its start block. If we weren't careful, we would get here - * again and end up in infinite recursion. This is why we set - * block->loop above and check for it before creating one. This - * way, we only create the loop once and the second call that - * tries to handle this loop goes to the cases below and gets - * handled as a regular block. - * - * Note: When we make the recursive walk calls, we pass NULL for - * the switch break since you have to break out of the loop first. - * We do, however, still pass the current switch case because it's - * possible that the merge block for the loop is the start of - * another case. - */ - vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, - new_loop_break, new_loop_cont, NULL ); - vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, - new_loop_break, NULL, block); - - block = new_loop_break; - continue; - } - - assert(block->node.link.next == NULL); - list_addtail(&block->node.link, cf_list); - - switch (*block->branch & SpvOpCodeMask) { - case SpvOpBranch: { - struct vtn_block *branch_block = - vtn_value(b, block->branch[1], vtn_value_type_block)->block; - - block->branch_type = vtn_get_branch_type(branch_block, - switch_case, switch_break, - loop_break, loop_cont); - - if (block->branch_type != vtn_branch_type_none) - return; - - block = branch_block; - continue; - } - - case SpvOpReturn: - case SpvOpReturnValue: - block->branch_type = vtn_branch_type_return; - return; - - case SpvOpKill: - block->branch_type = vtn_branch_type_discard; - return; - - case SpvOpBranchConditional: { - struct vtn_block *then_block = - vtn_value(b, block->branch[2], vtn_value_type_block)->block; - struct vtn_block *else_block = - vtn_value(b, block->branch[3], vtn_value_type_block)->block; - - struct vtn_if *if_stmt = ralloc(b, struct vtn_if); - - if_stmt->node.type = vtn_cf_node_type_if; - if_stmt->condition = block->branch[1]; - list_inithead(&if_stmt->then_body); - list_inithead(&if_stmt->else_body); - - list_addtail(&if_stmt->node.link, cf_list); - - if (block->merge && - (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { - if_stmt->control = block->merge[2]; - } - - if_stmt->then_type = vtn_get_branch_type(then_block, - switch_case, switch_break, - loop_break, loop_cont); - if_stmt->else_type = vtn_get_branch_type(else_block, - switch_case, switch_break, - loop_break, loop_cont); - - if (if_stmt->then_type == vtn_branch_type_none && - if_stmt->else_type == vtn_branch_type_none) { - /* Neither side of the if is something we can short-circuit. */ - assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); - struct vtn_block *merge_block = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - - vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, - switch_case, switch_break, - loop_break, loop_cont, merge_block); - vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, - switch_case, switch_break, - loop_break, loop_cont, merge_block); - - enum vtn_branch_type merge_type = - vtn_get_branch_type(merge_block, switch_case, switch_break, - loop_break, loop_cont); - if (merge_type == vtn_branch_type_none) { - block = merge_block; - continue; - } else { - return; - } - } else if (if_stmt->then_type != vtn_branch_type_none && - if_stmt->else_type != vtn_branch_type_none) { - /* Both sides were short-circuited. We're done here. */ - return; - } else { - /* Exeactly one side of the branch could be short-circuited. - * We set the branch up as a predicated break/continue and we - * continue on with the other side as if it were what comes - * after the if. - */ - if (if_stmt->then_type == vtn_branch_type_none) { - block = then_block; - } else { - block = else_block; - } - continue; - } - unreachable("Should have returned or continued"); - } - - case SpvOpSwitch: { - assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); - struct vtn_block *break_block = - vtn_value(b, block->merge[1], vtn_value_type_block)->block; - - struct vtn_switch *swtch = ralloc(b, struct vtn_switch); - - swtch->node.type = vtn_cf_node_type_switch; - swtch->selector = block->branch[1]; - list_inithead(&swtch->cases); - - list_addtail(&swtch->node.link, cf_list); - - /* First, we go through and record all of the cases. */ - const uint32_t *branch_end = - block->branch + (block->branch[0] >> SpvWordCountShift); - - vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); - for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) - vtn_add_case(b, swtch, break_block, w[1], w[0], false); - - /* Now, we go through and walk the blocks. While we walk through - * the blocks, we also gather the much-needed fall-through - * information. - */ - list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { - assert(cse->start_block != break_block); - vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, - break_block, NULL, loop_cont, NULL); - } - - /* Finally, we walk over all of the cases one more time and put - * them in fall-through order. - */ - for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { - struct vtn_block *case_block = - vtn_value(b, *w, vtn_value_type_block)->block; - - if (case_block == break_block) - continue; - - assert(case_block->switch_case); - - vtn_order_case(swtch, case_block->switch_case); - } - - block = break_block; - continue; - } - - case SpvOpUnreachable: - return; - - default: - unreachable("Unhandled opcode"); - } - } -} - -void -vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) -{ - vtn_foreach_instruction(b, words, end, - vtn_cfg_handle_prepass_instruction); - - foreach_list_typed(struct vtn_function, func, node, &b->functions) { - vtn_cfg_walk_blocks(b, &func->body, func->start_block, - NULL, NULL, NULL, NULL, NULL); - } -} - -static bool -vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode == SpvOpLabel) - return true; /* Nothing to do */ - - /* If this isn't a phi node, stop. */ - if (opcode != SpvOpPhi) - return false; - - /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. - * For each phi, we create a variable with the appropreate type and - * do a load from that variable. Then, in a second pass, we add - * stores to that variable to each of the predecessor blocks. - * - * We could do something more intelligent here. However, in order to - * handle loops and things properly, we really need dominance - * information. It would end up basically being the into-SSA - * algorithm all over again. It's easier if we just let - * lower_vars_to_ssa do that for us instead of repeating it here. - */ - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; - nir_variable *phi_var = - nir_local_variable_create(b->nb.impl, type->type, "phi"); - _mesa_hash_table_insert(b->phi_table, w, phi_var); - - val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); - - return true; -} - -static bool -vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - if (opcode != SpvOpPhi) - return true; - - struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); - assert(phi_entry); - nir_variable *phi_var = phi_entry->data; - - for (unsigned i = 3; i < count; i += 2) { - struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); - struct vtn_block *pred = - vtn_value(b, w[i + 1], vtn_value_type_block)->block; - - b->nb.cursor = nir_after_block_before_jump(pred->end_block); - - vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); - } - - return true; -} - -static void -vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, - nir_variable *switch_fall_var, bool *has_switch_break) -{ - switch (branch_type) { - case vtn_branch_type_switch_break: - nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); - *has_switch_break = true; - break; - case vtn_branch_type_switch_fallthrough: - break; /* Nothing to do */ - case vtn_branch_type_loop_break: - nir_jump(&b->nb, nir_jump_break); - break; - case vtn_branch_type_loop_continue: - nir_jump(&b->nb, nir_jump_continue); - break; - case vtn_branch_type_return: - nir_jump(&b->nb, nir_jump_return); - break; - case vtn_branch_type_discard: { - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); - nir_builder_instr_insert(&b->nb, &discard->instr); - break; - } - default: - unreachable("Invalid branch type"); - } -} - -static void -vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, - nir_variable *switch_fall_var, bool *has_switch_break, - vtn_instruction_handler handler) -{ - list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { - switch (node->type) { - case vtn_cf_node_type_block: { - struct vtn_block *block = (struct vtn_block *)node; - - const uint32_t *block_start = block->label; - const uint32_t *block_end = block->merge ? block->merge : - block->branch; - - block_start = vtn_foreach_instruction(b, block_start, block_end, - vtn_handle_phis_first_pass); - - vtn_foreach_instruction(b, block_start, block_end, handler); - - block->end_block = nir_cursor_current_block(b->nb.cursor); - - if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { - struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); - vtn_local_store(b, src, - nir_deref_var_create(b, b->impl->return_var)); - } - - if (block->branch_type != vtn_branch_type_none) { - vtn_emit_branch(b, block->branch_type, - switch_fall_var, has_switch_break); - } - - break; - } - - case vtn_cf_node_type_if: { - struct vtn_if *vtn_if = (struct vtn_if *)node; - - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = - nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); - nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); - - bool sw_break = false; - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - if (vtn_if->then_type == vtn_branch_type_none) { - vtn_emit_cf_list(b, &vtn_if->then_body, - switch_fall_var, &sw_break, handler); - } else { - vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); - } - - b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); - if (vtn_if->else_type == vtn_branch_type_none) { - vtn_emit_cf_list(b, &vtn_if->else_body, - switch_fall_var, &sw_break, handler); - } else { - vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); - } - - b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); - - /* If we encountered a switch break somewhere inside of the if, - * then it would have been handled correctly by calling - * emit_cf_list or emit_branch for the interrior. However, we - * need to predicate everything following on wether or not we're - * still going. - */ - if (sw_break) { - *has_switch_break = true; - - nir_if *switch_if = nir_if_create(b->shader); - switch_if->condition = - nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); - nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); - } - break; - } - - case vtn_cf_node_type_loop: { - struct vtn_loop *vtn_loop = (struct vtn_loop *)node; - - nir_loop *loop = nir_loop_create(b->shader); - nir_cf_node_insert(b->nb.cursor, &loop->cf_node); - - b->nb.cursor = nir_after_cf_list(&loop->body); - vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); - - if (!list_empty(&vtn_loop->cont_body)) { - /* If we have a non-trivial continue body then we need to put - * it at the beginning of the loop with a flag to ensure that - * it doesn't get executed in the first iteration. - */ - nir_variable *do_cont = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); - - b->nb.cursor = nir_before_cf_node(&loop->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); - - b->nb.cursor = nir_before_cf_list(&loop->body); - nir_if *cont_if = nir_if_create(b->shader); - cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); - nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); - - b->nb.cursor = nir_after_cf_list(&cont_if->then_list); - vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); - - b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); - nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); - - b->has_loop_continue = true; - } - - b->nb.cursor = nir_after_cf_node(&loop->cf_node); - break; - } - - case vtn_cf_node_type_switch: { - struct vtn_switch *vtn_switch = (struct vtn_switch *)node; - - /* First, we create a variable to keep track of whether or not the - * switch is still going at any given point. Any switch breaks - * will set this variable to false. - */ - nir_variable *fall_var = - nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); - - /* Next, we gather up all of the conditions. We have to do this - * up-front because we also need to build an "any" condition so - * that we can use !any for default. - */ - const int num_cases = list_length(&vtn_switch->cases); - NIR_VLA(nir_ssa_def *, conditions, num_cases); - - nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; - /* An accumulation of all conditions. Used for the default */ - nir_ssa_def *any = NULL; - - int i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - if (cse->is_default) { - conditions[i++] = NULL; - continue; - } - - nir_ssa_def *cond = NULL; - nir_array_foreach(&cse->values, uint32_t, val) { - nir_ssa_def *is_val = - nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); - - cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; - } - - any = any ? nir_ior(&b->nb, any, cond) : cond; - conditions[i++] = cond; - } - assert(i == num_cases); - - /* Now we can walk the list of cases and actually emit code */ - i = 0; - list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { - /* Figure out the condition */ - nir_ssa_def *cond = conditions[i++]; - if (cse->is_default) { - assert(cond == NULL); - cond = nir_inot(&b->nb, any); - } - /* Take fallthrough into account */ - cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); - - nir_if *case_if = nir_if_create(b->nb.shader); - case_if->condition = nir_src_for_ssa(cond); - nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); - - bool has_break = false; - b->nb.cursor = nir_after_cf_list(&case_if->then_list); - nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); - vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); - (void)has_break; /* We don't care */ - - b->nb.cursor = nir_after_cf_node(&case_if->cf_node); - } - assert(i == num_cases); - - break; - } - - default: - unreachable("Invalid CF node type"); - } - } -} - -void -vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, - vtn_instruction_handler instruction_handler) -{ - nir_builder_init(&b->nb, func->impl); - b->nb.cursor = nir_after_cf_list(&func->impl->body); - b->has_loop_continue = false; - b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); - - vtn_foreach_instruction(b, func->start_block->label, func->end, - vtn_handle_phi_second_pass); - - /* Continue blocks for loops get inserted before the body of the loop - * but instructions in the continue may use SSA defs in the loop body. - * Therefore, we need to repair SSA to insert the needed phi nodes. - */ - if (b->has_loop_continue) - nir_repair_ssa_impl(func->impl); -} diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c deleted file mode 100644 index e05d28ffede..00000000000 --- a/src/compiler/nir/spirv/vtn_glsl450.c +++ /dev/null @@ -1,666 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" -#include "GLSL.std.450.h" - -#define M_PIf ((float) M_PI) -#define M_PI_2f ((float) M_PI_2) -#define M_PI_4f ((float) M_PI_4) - -static nir_ssa_def * -build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) -{ - unsigned swiz[4] = {1, 0, 0, 0}; - nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); - return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); -} - -static nir_ssa_def * -build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) -{ - unsigned yzx[4] = {1, 2, 0, 0}; - unsigned zxy[4] = {2, 0, 1, 0}; - - nir_ssa_def *prod0 = - nir_fmul(b, col[0], - nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), - nir_swizzle(b, col[2], zxy, 3, true))); - nir_ssa_def *prod1 = - nir_fmul(b, col[0], - nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), - nir_swizzle(b, col[2], yzx, 3, true))); - - nir_ssa_def *diff = nir_fsub(b, prod0, prod1); - - return nir_fadd(b, nir_channel(b, diff, 0), - nir_fadd(b, nir_channel(b, diff, 1), - nir_channel(b, diff, 2))); -} - -static nir_ssa_def * -build_mat4_det(nir_builder *b, nir_ssa_def **col) -{ - nir_ssa_def *subdet[4]; - for (unsigned i = 0; i < 4; i++) { - unsigned swiz[3]; - for (unsigned j = 0; j < 3; j++) - swiz[j] = j + (j >= i); - - nir_ssa_def *subcol[3]; - subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); - subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); - subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); - - subdet[i] = build_mat3_det(b, subcol); - } - - nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); - - return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), - nir_channel(b, prod, 1)), - nir_fsub(b, nir_channel(b, prod, 2), - nir_channel(b, prod, 3))); -} - -static nir_ssa_def * -build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - unsigned size = glsl_get_vector_elements(src->type); - - nir_ssa_def *cols[4]; - for (unsigned i = 0; i < size; i++) - cols[i] = src->elems[i]->def; - - switch(size) { - case 2: return build_mat2_det(&b->nb, cols); - case 3: return build_mat3_det(&b->nb, cols); - case 4: return build_mat4_det(&b->nb, cols); - default: - unreachable("Invalid matrix size"); - } -} - -/* Computes the determinate of the submatrix given by taking src and - * removing the specified row and column. - */ -static nir_ssa_def * -build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, - unsigned size, unsigned row, unsigned col) -{ - assert(row < size && col < size); - if (size == 2) { - return nir_channel(b, src->elems[1 - col]->def, 1 - row); - } else { - /* Swizzle to get all but the specified row */ - unsigned swiz[3]; - for (unsigned j = 0; j < 3; j++) - swiz[j] = j + (j >= row); - - /* Grab all but the specified column */ - nir_ssa_def *subcol[3]; - for (unsigned j = 0; j < size; j++) { - if (j != col) { - subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, - swiz, size - 1, true); - } - } - - if (size == 3) { - return build_mat2_det(b, subcol); - } else { - assert(size == 4); - return build_mat3_det(b, subcol); - } - } -} - -static struct vtn_ssa_value * -matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) -{ - nir_ssa_def *adj_col[4]; - unsigned size = glsl_get_vector_elements(src->type); - - /* Build up an adjugate matrix */ - for (unsigned c = 0; c < size; c++) { - nir_ssa_def *elem[4]; - for (unsigned r = 0; r < size; r++) { - elem[r] = build_mat_subdet(&b->nb, src, size, c, r); - - if ((r + c) % 2) - elem[r] = nir_fneg(&b->nb, elem[r]); - } - - adj_col[c] = nir_vec(&b->nb, elem, size); - } - - nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); - - struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); - for (unsigned i = 0; i < size; i++) - val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); - - return val; -} - -static nir_ssa_def* -build_length(nir_builder *b, nir_ssa_def *vec) -{ - switch (vec->num_components) { - case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); - case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); - case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); - case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); - default: - unreachable("Invalid number of components"); - } -} - -static inline nir_ssa_def * -build_fclamp(nir_builder *b, - nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) -{ - return nir_fmin(b, nir_fmax(b, x, min_val), max_val); -} - -/** - * Return e^x. - */ -static nir_ssa_def * -build_exp(nir_builder *b, nir_ssa_def *x) -{ - return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); -} - -/** - * Return ln(x) - the natural logarithm of x. - */ -static nir_ssa_def * -build_log(nir_builder *b, nir_ssa_def *x) -{ - return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); -} - -/** - * Approximate asin(x) by the formula: - * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) - * - * which is correct to first order at x=0 and x=±1 regardless of the p - * coefficients but can be made second-order correct at both ends by selecting - * the fit coefficients appropriately. Different p coefficients can be used - * in the asin and acos implementation to minimize some relative error metric - * in each case. - */ -static nir_ssa_def * -build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) -{ - nir_ssa_def *abs_x = nir_fabs(b, x); - return nir_fmul(b, nir_fsign(b, x), - nir_fsub(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), - nir_fadd(b, nir_imm_float(b, M_PI_2f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), - nir_fmul(b, abs_x, - nir_fadd(b, nir_imm_float(b, p0), - nir_fmul(b, abs_x, - nir_imm_float(b, p1)))))))))); -} - -/** - * Compute xs[0] + xs[1] + xs[2] + ... using fadd. - */ -static nir_ssa_def * -build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) -{ - nir_ssa_def *accum = xs[0]; - - for (int i = 1; i < terms; i++) - accum = nir_fadd(b, accum, xs[i]); - - return accum; -} - -static nir_ssa_def * -build_atan(nir_builder *b, nir_ssa_def *y_over_x) -{ - nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); - nir_ssa_def *one = nir_imm_float(b, 1.0f); - - /* - * range-reduction, first step: - * - * / y_over_x if |y_over_x| <= 1.0; - * x = < - * \ 1.0 / y_over_x otherwise - */ - nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), - nir_fmax(b, abs_y_over_x, one)); - - /* - * approximate atan by evaluating polynomial: - * - * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + - * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + - * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 - */ - nir_ssa_def *x_2 = nir_fmul(b, x, x); - nir_ssa_def *x_3 = nir_fmul(b, x_2, x); - nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); - nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); - nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); - nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); - - nir_ssa_def *polynomial_terms[] = { - nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), - nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), - nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), - nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), - nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), - nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), - }; - - nir_ssa_def *tmp = - build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); - - /* range-reduction fixup */ - tmp = nir_fadd(b, tmp, - nir_fmul(b, - nir_b2f(b, nir_flt(b, one, abs_y_over_x)), - nir_fadd(b, nir_fmul(b, tmp, - nir_imm_float(b, -2.0f)), - nir_imm_float(b, M_PI_2f)))); - - /* sign fixup */ - return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); -} - -static nir_ssa_def * -build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) -{ - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* If |x| >= 1.0e-8 * |y|: */ - nir_ssa_def *condition = - nir_fge(b, nir_fabs(b, x), - nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); - - /* Then...call atan(y/x) and fix it up: */ - nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); - nir_ssa_def *r_then = - nir_bcsel(b, nir_flt(b, x, zero), - nir_fadd(b, atan1, - nir_bcsel(b, nir_fge(b, y, zero), - nir_imm_float(b, M_PIf), - nir_imm_float(b, -M_PIf))), - atan1); - - /* Else... */ - nir_ssa_def *r_else = - nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); - - return nir_bcsel(b, condition, r_then, r_else); -} - -static nir_ssa_def * -build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) -{ - nir_ssa_def *abs_x = nir_fabs(b, x); - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* Single-precision floating-point values are stored as - * 1 sign bit; - * 8 exponent bits; - * 23 mantissa bits. - * - * An exponent shift of 23 will shift the mantissa out, leaving only the - * exponent and sign bit (which itself may be zero, if the absolute value - * was taken before the bitcast and shift. - */ - nir_ssa_def *exponent_shift = nir_imm_int(b, 23); - nir_ssa_def *exponent_bias = nir_imm_int(b, -126); - - nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); - - /* Exponent of floating-point values in the range [0.5, 1.0). */ - nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); - - nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); - - *exponent = - nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), - nir_bcsel(b, is_not_zero, exponent_bias, zero)); - - return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), - nir_bcsel(b, is_not_zero, exponent_value, zero)); -} - -static nir_op -vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) -{ - switch (opcode) { - case GLSLstd450Round: return nir_op_fround_even; - case GLSLstd450RoundEven: return nir_op_fround_even; - case GLSLstd450Trunc: return nir_op_ftrunc; - case GLSLstd450FAbs: return nir_op_fabs; - case GLSLstd450SAbs: return nir_op_iabs; - case GLSLstd450FSign: return nir_op_fsign; - case GLSLstd450SSign: return nir_op_isign; - case GLSLstd450Floor: return nir_op_ffloor; - case GLSLstd450Ceil: return nir_op_fceil; - case GLSLstd450Fract: return nir_op_ffract; - case GLSLstd450Sin: return nir_op_fsin; - case GLSLstd450Cos: return nir_op_fcos; - case GLSLstd450Pow: return nir_op_fpow; - case GLSLstd450Exp2: return nir_op_fexp2; - case GLSLstd450Log2: return nir_op_flog2; - case GLSLstd450Sqrt: return nir_op_fsqrt; - case GLSLstd450InverseSqrt: return nir_op_frsq; - case GLSLstd450FMin: return nir_op_fmin; - case GLSLstd450UMin: return nir_op_umin; - case GLSLstd450SMin: return nir_op_imin; - case GLSLstd450FMax: return nir_op_fmax; - case GLSLstd450UMax: return nir_op_umax; - case GLSLstd450SMax: return nir_op_imax; - case GLSLstd450FMix: return nir_op_flrp; - case GLSLstd450Fma: return nir_op_ffma; - case GLSLstd450Ldexp: return nir_op_ldexp; - case GLSLstd450FindILsb: return nir_op_find_lsb; - case GLSLstd450FindSMsb: return nir_op_ifind_msb; - case GLSLstd450FindUMsb: return nir_op_ufind_msb; - - /* Packing/Unpacking functions */ - case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8; - case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8; - case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; - case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; - case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; - case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; - case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; - case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; - case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; - case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; - - default: - unreachable("No NIR equivalent"); - } -} - -static void -handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, - const uint32_t *w, unsigned count) -{ - struct nir_builder *nb = &b->nb; - const struct glsl_type *dest_type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_create_ssa_value(b, dest_type); - - /* Collect the various SSA sources */ - unsigned num_inputs = count - 5; - nir_ssa_def *src[3] = { NULL, }; - for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5])->def; - - switch (entrypoint) { - case GLSLstd450Radians: - val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); - return; - case GLSLstd450Degrees: - val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); - return; - case GLSLstd450Tan: - val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), - nir_fcos(nb, src[0])); - return; - - case GLSLstd450Modf: { - nir_ssa_def *sign = nir_fsign(nb, src[0]); - nir_ssa_def *abs = nir_fabs(nb, src[0]); - val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); - nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), - nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); - return; - } - - case GLSLstd450ModfStruct: { - nir_ssa_def *sign = nir_fsign(nb, src[0]); - nir_ssa_def *abs = nir_fabs(nb, src[0]); - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); - val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); - return; - } - - case GLSLstd450Step: - val->ssa->def = nir_sge(nb, src[1], src[0]); - return; - - case GLSLstd450Length: - val->ssa->def = build_length(nb, src[0]); - return; - case GLSLstd450Distance: - val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); - return; - case GLSLstd450Normalize: - val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); - return; - - case GLSLstd450Exp: - val->ssa->def = build_exp(nb, src[0]); - return; - - case GLSLstd450Log: - val->ssa->def = build_log(nb, src[0]); - return; - - case GLSLstd450FClamp: - val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); - return; - case GLSLstd450UClamp: - val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); - return; - case GLSLstd450SClamp: - val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); - return; - - case GLSLstd450Cross: { - unsigned yzx[4] = { 1, 2, 0, 0 }; - unsigned zxy[4] = { 2, 0, 1, 0 }; - val->ssa->def = - nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), - nir_swizzle(nb, src[1], zxy, 3, true)), - nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), - nir_swizzle(nb, src[1], yzx, 3, true))); - return; - } - - case GLSLstd450SmoothStep: { - /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ - nir_ssa_def *t = - build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), - nir_fsub(nb, src[1], src[0])), - nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); - /* result = t * t * (3 - 2 * t) */ - val->ssa->def = - nir_fmul(nb, t, nir_fmul(nb, t, - nir_fsub(nb, nir_imm_float(nb, 3.0), - nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); - return; - } - - case GLSLstd450FaceForward: - val->ssa->def = - nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), - nir_imm_float(nb, 0.0)), - src[0], nir_fneg(nb, src[0])); - return; - - case GLSLstd450Reflect: - /* I - 2 * dot(N, I) * N */ - val->ssa->def = - nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), - nir_fmul(nb, nir_fdot(nb, src[0], src[1]), - src[1]))); - return; - - case GLSLstd450Refract: { - nir_ssa_def *I = src[0]; - nir_ssa_def *N = src[1]; - nir_ssa_def *eta = src[2]; - nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); - nir_ssa_def *one = nir_imm_float(nb, 1.0); - nir_ssa_def *zero = nir_imm_float(nb, 0.0); - /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ - nir_ssa_def *k = - nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, - nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); - nir_ssa_def *result = - nir_fsub(nb, nir_fmul(nb, eta, I), - nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), - nir_fsqrt(nb, k)), N)); - /* XXX: bcsel, or if statement? */ - val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); - return; - } - - case GLSLstd450Sinh: - /* 0.5 * (e^x - e^(-x)) */ - val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))); - return; - - case GLSLstd450Cosh: - /* 0.5 * (e^x + e^(-x)) */ - val->ssa->def = - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))); - return; - - case GLSLstd450Tanh: - /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ - val->ssa->def = - nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))), - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0]))))); - return; - - case GLSLstd450Asinh: - val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), - build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), - nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f)))))); - return; - case GLSLstd450Acosh: - val->ssa->def = build_log(nb, nir_fadd(nb, src[0], - nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), - nir_imm_float(nb, 1.0f))))); - return; - case GLSLstd450Atanh: { - nir_ssa_def *one = nir_imm_float(nb, 1.0); - val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), - build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), - nir_fsub(nb, one, src[0])))); - return; - } - - case GLSLstd450Asin: - val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); - return; - - case GLSLstd450Acos: - val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), - build_asin(nb, src[0], 0.08132463, -0.02363318)); - return; - - case GLSLstd450Atan: - val->ssa->def = build_atan(nb, src[0]); - return; - - case GLSLstd450Atan2: - val->ssa->def = build_atan2(nb, src[0], src[1]); - return; - - case GLSLstd450Frexp: { - nir_ssa_def *exponent; - val->ssa->def = build_frexp(nb, src[0], &exponent); - nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); - return; - } - - case GLSLstd450FrexpStruct: { - assert(glsl_type_is_struct(val->ssa->type)); - val->ssa->elems[0]->def = build_frexp(nb, src[0], - &val->ssa->elems[1]->def); - return; - } - - default: - val->ssa->def = - nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint), - src[0], src[1], src[2], NULL); - return; - } -} - -bool -vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *w, unsigned count) -{ - switch ((enum GLSLstd450)ext_opcode) { - case GLSLstd450Determinant: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); - break; - } - - case GLSLstd450MatrixInverse: { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); - break; - } - - case GLSLstd450InterpolateAtCentroid: - case GLSLstd450InterpolateAtSample: - case GLSLstd450InterpolateAtOffset: - unreachable("Unhandled opcode"); - - default: - handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); - } - - return true; -} diff --git a/src/compiler/nir/spirv/vtn_private.h b/src/compiler/nir/spirv/vtn_private.h deleted file mode 100644 index 3840d8c4b65..00000000000 --- a/src/compiler/nir/spirv/vtn_private.h +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir/nir.h" -#include "nir/nir_builder.h" -#include "nir/nir_array.h" -#include "nir_spirv.h" -#include "spirv.h" - -struct vtn_builder; -struct vtn_decoration; - -enum vtn_value_type { - vtn_value_type_invalid = 0, - vtn_value_type_undef, - vtn_value_type_string, - vtn_value_type_decoration_group, - vtn_value_type_type, - vtn_value_type_constant, - vtn_value_type_access_chain, - vtn_value_type_function, - vtn_value_type_block, - vtn_value_type_ssa, - vtn_value_type_extension, - vtn_value_type_image_pointer, - vtn_value_type_sampled_image, -}; - -enum vtn_branch_type { - vtn_branch_type_none, - vtn_branch_type_switch_break, - vtn_branch_type_switch_fallthrough, - vtn_branch_type_loop_break, - vtn_branch_type_loop_continue, - vtn_branch_type_discard, - vtn_branch_type_return, -}; - -enum vtn_cf_node_type { - vtn_cf_node_type_block, - vtn_cf_node_type_if, - vtn_cf_node_type_loop, - vtn_cf_node_type_switch, -}; - -struct vtn_cf_node { - struct list_head link; - enum vtn_cf_node_type type; -}; - -struct vtn_loop { - struct vtn_cf_node node; - - /* The main body of the loop */ - struct list_head body; - - /* The "continue" part of the loop. This gets executed after the body - * and is where you go when you hit a continue. - */ - struct list_head cont_body; - - SpvLoopControlMask control; -}; - -struct vtn_if { - struct vtn_cf_node node; - - uint32_t condition; - - enum vtn_branch_type then_type; - struct list_head then_body; - - enum vtn_branch_type else_type; - struct list_head else_body; - - SpvSelectionControlMask control; -}; - -struct vtn_case { - struct list_head link; - - struct list_head body; - - /* The block that starts this case */ - struct vtn_block *start_block; - - /* The fallthrough case, if any */ - struct vtn_case *fallthrough; - - /* The uint32_t values that map to this case */ - nir_array values; - - /* True if this is the default case */ - bool is_default; - - /* Initialized to false; used when sorting the list of cases */ - bool visited; -}; - -struct vtn_switch { - struct vtn_cf_node node; - - uint32_t selector; - - struct list_head cases; -}; - -struct vtn_block { - struct vtn_cf_node node; - - /** A pointer to the label instruction */ - const uint32_t *label; - - /** A pointer to the merge instruction (or NULL if non exists) */ - const uint32_t *merge; - - /** A pointer to the branch instruction that ends this block */ - const uint32_t *branch; - - enum vtn_branch_type branch_type; - - /** Points to the loop that this block starts (if it starts a loop) */ - struct vtn_loop *loop; - - /** Points to the switch case started by this block (if any) */ - struct vtn_case *switch_case; - - /** The last block in this SPIR-V block. */ - nir_block *end_block; -}; - -struct vtn_function { - struct exec_node node; - - nir_function_impl *impl; - struct vtn_block *start_block; - - struct list_head body; - - const uint32_t *end; - - SpvFunctionControlMask control; -}; - -typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, - const uint32_t *, unsigned); - -void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, - const uint32_t *end); -void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, - vtn_instruction_handler instruction_handler); - -const uint32_t * -vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, - const uint32_t *end, vtn_instruction_handler handler); - -struct vtn_ssa_value { - union { - nir_ssa_def *def; - struct vtn_ssa_value **elems; - }; - - /* For matrices, if this is non-NULL, then this value is actually the - * transpose of some other value. The value that `transposed` points to - * always dominates this value. - */ - struct vtn_ssa_value *transposed; - - const struct glsl_type *type; -}; - -struct vtn_type { - const struct glsl_type *type; - - /* The value that declares this type. Used for finding decorations */ - struct vtn_value *val; - - /* for matrices, whether the matrix is stored row-major */ - bool row_major; - - /* for structs, the offset of each member */ - unsigned *offsets; - - /* for structs, whether it was decorated as a "non-SSBO-like" block */ - bool block; - - /* for structs, whether it was decorated as an "SSBO-like" block */ - bool buffer_block; - - /* for structs with block == true, whether this is a builtin block (i.e. a - * block that contains only builtins). - */ - bool builtin_block; - - /* Image format for image_load_store type images */ - unsigned image_format; - - /* Access qualifier for storage images */ - SpvAccessQualifier access_qualifier; - - /* for arrays and matrices, the array stride */ - unsigned stride; - - /* for arrays, the vtn_type for the elements of the array */ - struct vtn_type *array_element; - - /* for structures, the vtn_type for each member */ - struct vtn_type **members; - - /* Whether this type, or a parent type, has been decorated as a builtin */ - bool is_builtin; - - SpvBuiltIn builtin; -}; - -struct vtn_variable; - -enum vtn_access_mode { - vtn_access_mode_id, - vtn_access_mode_literal, -}; - -struct vtn_access_link { - enum vtn_access_mode mode; - uint32_t id; -}; - -struct vtn_access_chain { - struct vtn_variable *var; - - uint32_t length; - - /* Struct elements and array offsets */ - struct vtn_access_link link[0]; -}; - -enum vtn_variable_mode { - vtn_variable_mode_local, - vtn_variable_mode_global, - vtn_variable_mode_param, - vtn_variable_mode_ubo, - vtn_variable_mode_ssbo, - vtn_variable_mode_push_constant, - vtn_variable_mode_image, - vtn_variable_mode_sampler, - vtn_variable_mode_workgroup, - vtn_variable_mode_input, - vtn_variable_mode_output, -}; - -struct vtn_variable { - enum vtn_variable_mode mode; - - struct vtn_type *type; - - unsigned descriptor_set; - unsigned binding; - - nir_variable *var; - nir_variable **members; - - struct vtn_access_chain chain; -}; - -struct vtn_image_pointer { - struct vtn_access_chain *image; - nir_ssa_def *coord; - nir_ssa_def *sample; -}; - -struct vtn_sampled_image { - struct vtn_access_chain *image; /* Image or array of images */ - struct vtn_access_chain *sampler; /* Sampler */ -}; - -struct vtn_value { - enum vtn_value_type value_type; - const char *name; - struct vtn_decoration *decoration; - union { - void *ptr; - char *str; - struct vtn_type *type; - struct { - nir_constant *constant; - const struct glsl_type *const_type; - }; - struct vtn_access_chain *access_chain; - struct vtn_image_pointer *image; - struct vtn_sampled_image *sampled_image; - struct vtn_function *func; - struct vtn_block *block; - struct vtn_ssa_value *ssa; - vtn_instruction_handler ext_handler; - }; -}; - -#define VTN_DEC_DECORATION -1 -#define VTN_DEC_EXECUTION_MODE -2 -#define VTN_DEC_STRUCT_MEMBER0 0 - -struct vtn_decoration { - struct vtn_decoration *next; - - /* Specifies how to apply this decoration. Negative values represent a - * decoration or execution mode. (See the VTN_DEC_ #defines above.) - * Non-negative values specify that it applies to a structure member. - */ - int scope; - - const uint32_t *literals; - struct vtn_value *group; - - union { - SpvDecoration decoration; - SpvExecutionMode exec_mode; - }; -}; - -struct vtn_builder { - nir_builder nb; - - nir_shader *shader; - nir_function_impl *impl; - struct vtn_block *block; - - /* Current file, line, and column. Useful for debugging. Set - * automatically by vtn_foreach_instruction. - */ - char *file; - int line, col; - - /* - * In SPIR-V, constants are global, whereas in NIR, the load_const - * instruction we use is per-function. So while we parse each function, we - * keep a hash table of constants we've resolved to nir_ssa_value's so - * far, and we lazily resolve them when we see them used in a function. - */ - struct hash_table *const_table; - - /* - * Map from phi instructions (pointer to the start of the instruction) - * to the variable corresponding to it. - */ - struct hash_table *phi_table; - - unsigned num_specializations; - struct nir_spirv_specialization *specializations; - - unsigned value_id_bound; - struct vtn_value *values; - - gl_shader_stage entry_point_stage; - const char *entry_point_name; - struct vtn_value *entry_point; - bool origin_upper_left; - - struct vtn_function *func; - struct exec_list functions; - - /* Current function parameter index */ - unsigned func_param_idx; - - bool has_loop_continue; -}; - -static inline struct vtn_value * -vtn_push_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - assert(value_id < b->value_id_bound); - assert(b->values[value_id].value_type == vtn_value_type_invalid); - - b->values[value_id].value_type = value_type; - - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) -{ - assert(value_id < b->value_id_bound); - return &b->values[value_id]; -} - -static inline struct vtn_value * -vtn_value(struct vtn_builder *b, uint32_t value_id, - enum vtn_value_type value_type) -{ - struct vtn_value *val = vtn_untyped_value(b, value_id); - assert(val->value_type == value_type); - return val; -} - -struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); - -struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, - const struct glsl_type *type); - -struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, - struct vtn_ssa_value *src); - -nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, - unsigned index); -nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *index); -nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, unsigned index); -nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, - nir_ssa_def *insert, nir_ssa_def *index); - -nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); - -nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, - struct vtn_access_chain *chain); -nir_ssa_def * -vtn_access_chain_to_offset(struct vtn_builder *b, - struct vtn_access_chain *chain, - nir_ssa_def **index_out, struct vtn_type **type_out, - unsigned *end_idx_out, bool stop_at_matrix); - -struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); - -void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest); - -struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); - -void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dest); - -void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count); - - -typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - int member, - const struct vtn_decoration *, - void *); - -void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, - vtn_decoration_foreach_cb cb, void *data); - -typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, - struct vtn_value *, - const struct vtn_decoration *, - void *); - -void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, - vtn_execution_mode_foreach_cb cb, void *data); - -nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); - -void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count); - -bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, - const uint32_t *words, unsigned count); diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c deleted file mode 100644 index 3cbac1e5da8..00000000000 --- a/src/compiler/nir/spirv/vtn_variables.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "vtn_private.h" - -static struct vtn_access_chain * -vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, - unsigned new_ids) -{ - struct vtn_access_chain *chain; - - unsigned new_len = old->length + new_ids; - chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); - - chain->var = old->var; - chain->length = new_len; - - for (unsigned i = 0; i < old->length; i++) - chain->link[i] = old->link[i]; - - return chain; -} - -static nir_ssa_def * -vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, - unsigned stride) -{ - assert(stride > 0); - if (link.mode == vtn_access_mode_literal) { - return nir_imm_int(&b->nb, link.id * stride); - } else if (stride == 1) { - return vtn_ssa_value(b, link.id)->def; - } else { - return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, - nir_imm_int(&b->nb, stride)); - } -} - -static struct vtn_type * -vtn_access_chain_tail_type(struct vtn_builder *b, - struct vtn_access_chain *chain) -{ - struct vtn_type *type = chain->var->type; - for (unsigned i = 0; i < chain->length; i++) { - if (glsl_type_is_struct(type->type)) { - assert(chain->link[i].mode == vtn_access_mode_literal); - type = type->members[chain->link[i].id]; - } else { - type = type->array_element; - } - } - return type; -} - -/* Crawls a chain of array derefs and rewrites the types so that the - * lengths stay the same but the terminal type is the one given by - * tail_type. This is useful for split structures. - */ -static void -rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) -{ - deref->type = type; - if (deref->child) { - assert(deref->child->deref_type == nir_deref_type_array); - assert(glsl_type_is_array(deref->type)); - rewrite_deref_types(deref->child, glsl_get_array_element(type)); - } -} - -nir_deref_var * -vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) -{ - nir_deref_var *deref_var; - if (chain->var->var) { - deref_var = nir_deref_var_create(b, chain->var->var); - } else { - assert(chain->var->members); - /* Create the deref_var manually. It will get filled out later. */ - deref_var = rzalloc(b, nir_deref_var); - deref_var->deref.deref_type = nir_deref_type_var; - } - - struct vtn_type *deref_type = chain->var->type; - nir_deref *tail = &deref_var->deref; - nir_variable **members = chain->var->members; - - for (unsigned i = 0; i < chain->length; i++) { - enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_ARRAY: { - deref_type = deref_type->array_element; - - nir_deref_array *deref_arr = nir_deref_array_create(b); - deref_arr->deref.type = deref_type->type; - - if (chain->link[i].mode == vtn_access_mode_literal) { - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->base_offset = chain->link[i].id; - } else { - assert(chain->link[i].mode == vtn_access_mode_id); - deref_arr->deref_array_type = nir_deref_array_type_indirect; - deref_arr->base_offset = 0; - deref_arr->indirect = - nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); - } - tail->child = &deref_arr->deref; - tail = tail->child; - break; - } - - case GLSL_TYPE_STRUCT: { - assert(chain->link[i].mode == vtn_access_mode_literal); - unsigned idx = chain->link[i].id; - deref_type = deref_type->members[idx]; - if (members) { - /* This is a pre-split structure. */ - deref_var->var = members[idx]; - rewrite_deref_types(&deref_var->deref, members[idx]->type); - assert(tail->type == deref_type->type); - members = NULL; - } else { - nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); - deref_struct->deref.type = deref_type->type; - tail->child = &deref_struct->deref; - tail = tail->child; - } - break; - } - default: - unreachable("Invalid type for deref"); - } - } - - assert(members == NULL); - return deref_var; -} - -static void -_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, - nir_deref *tail, struct vtn_ssa_value *inout) -{ - /* The deref tail may contain a deref to select a component of a vector (in - * other words, it might not be an actual tail) so we have to save it away - * here since we overwrite it later. - */ - nir_deref *old_child = tail->child; - - if (glsl_type_is_vector_or_scalar(tail->type)) { - /* Terminate the deref chain in case there is one more link to pick - * off a component of the vector. - */ - tail->child = NULL; - - nir_intrinsic_op op = load ? nir_intrinsic_load_var : - nir_intrinsic_store_var; - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); - intrin->variables[0] = - nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); - intrin->num_components = glsl_get_vector_elements(tail->type); - - if (load) { - nir_ssa_dest_init(&intrin->instr, &intrin->dest, - intrin->num_components, - glsl_get_bit_size(glsl_get_base_type(tail->type)), - NULL); - inout->def = &intrin->dest.ssa; - } else { - nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); - intrin->src[0] = nir_src_for_ssa(inout->def); - } - - nir_builder_instr_insert(&b->nb, &intrin->instr); - } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || - glsl_type_is_matrix(tail->type)) { - unsigned elems = glsl_get_length(tail->type); - nir_deref_array *deref_arr = nir_deref_array_create(b); - deref_arr->deref_array_type = nir_deref_array_type_direct; - deref_arr->deref.type = glsl_get_array_element(tail->type); - tail->child = &deref_arr->deref; - for (unsigned i = 0; i < elems; i++) { - deref_arr->base_offset = i; - _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); - } - } else { - assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); - unsigned elems = glsl_get_length(tail->type); - nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); - tail->child = &deref_struct->deref; - for (unsigned i = 0; i < elems; i++) { - deref_struct->index = i; - deref_struct->deref.type = glsl_get_struct_field(tail->type, i); - _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); - } - } - - tail->child = old_child; -} - -nir_deref_var * -vtn_nir_deref(struct vtn_builder *b, uint32_t id) -{ - struct vtn_access_chain *chain = - vtn_value(b, id, vtn_value_type_access_chain)->access_chain; - - return vtn_access_chain_to_deref(b, chain); -} - -/* - * Gets the NIR-level deref tail, which may have as a child an array deref - * selecting which component due to OpAccessChain supporting per-component - * indexing in SPIR-V. - */ -static nir_deref * -get_deref_tail(nir_deref_var *deref) -{ - nir_deref *cur = &deref->deref; - while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) - cur = cur->child; - - return cur; -} - -struct vtn_ssa_value * -vtn_local_load(struct vtn_builder *b, nir_deref_var *src) -{ - nir_deref *src_tail = get_deref_tail(src); - struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); - _vtn_local_load_store(b, true, src, src_tail, val); - - if (src_tail->child) { - nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); - assert(vec_deref->deref.child == NULL); - val->type = vec_deref->deref.type; - if (vec_deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); - else - val->def = vtn_vector_extract_dynamic(b, val->def, - vec_deref->indirect.ssa); - } - - return val; -} - -void -vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, - nir_deref_var *dest) -{ - nir_deref *dest_tail = get_deref_tail(dest); - - if (dest_tail->child) { - struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); - _vtn_local_load_store(b, true, dest, dest_tail, val); - nir_deref_array *deref = nir_deref_as_array(dest_tail->child); - assert(deref->deref.child == NULL); - if (deref->deref_array_type == nir_deref_array_type_direct) - val->def = vtn_vector_insert(b, val->def, src->def, - deref->base_offset); - else - val->def = vtn_vector_insert_dynamic(b, val->def, src->def, - deref->indirect.ssa); - _vtn_local_load_store(b, false, dest, dest_tail, val); - } else { - _vtn_local_load_store(b, false, dest, dest_tail, src); - } -} - -static nir_ssa_def * -get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, - struct vtn_type **type, unsigned *chain_idx) -{ - /* Push constants have no explicit binding */ - if (chain->var->mode == vtn_variable_mode_push_constant) { - *chain_idx = 0; - *type = chain->var->type; - return NULL; - } - - nir_ssa_def *array_index; - if (glsl_type_is_array(chain->var->type->type)) { - assert(chain->length > 0); - array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); - *chain_idx = 1; - *type = chain->var->type->array_element; - } else { - array_index = nir_imm_int(&b->nb, 0); - *chain_idx = 0; - *type = chain->var->type; - } - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_vulkan_resource_index); - instr->src[0] = nir_src_for_ssa(array_index); - nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); - nir_intrinsic_set_binding(instr, chain->var->binding); - - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - - return &instr->dest.ssa; -} - -nir_ssa_def * -vtn_access_chain_to_offset(struct vtn_builder *b, - struct vtn_access_chain *chain, - nir_ssa_def **index_out, struct vtn_type **type_out, - unsigned *end_idx_out, bool stop_at_matrix) -{ - unsigned idx = 0; - struct vtn_type *type; - *index_out = get_vulkan_resource_index(b, chain, &type, &idx); - - nir_ssa_def *offset = nir_imm_int(&b->nb, 0); - for (; idx < chain->length; idx++) { - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - /* Some users may not want matrix or vector derefs */ - if (stop_at_matrix) - goto end; - /* Fall through */ - - case GLSL_TYPE_ARRAY: - offset = nir_iadd(&b->nb, offset, - vtn_access_link_as_ssa(b, chain->link[idx], - type->stride)); - - type = type->array_element; - break; - - case GLSL_TYPE_STRUCT: { - assert(chain->link[idx].mode == vtn_access_mode_literal); - unsigned member = chain->link[idx].id; - offset = nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, type->offsets[member])); - type = type->members[member]; - break; - } - - default: - unreachable("Invalid type for deref"); - } - } - -end: - *type_out = type; - if (end_idx_out) - *end_idx_out = idx; - - return offset; -} - -static void -_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_ssa_value **inout, const struct glsl_type *type) -{ - nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); - instr->num_components = glsl_get_vector_elements(type); - - int src = 0; - if (!load) { - nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); - instr->src[src++] = nir_src_for_ssa((*inout)->def); - } - - /* We set the base and size for push constant load to the entire push - * constant block for now. - */ - if (op == nir_intrinsic_load_push_constant) { - nir_intrinsic_set_base(instr, 0); - nir_intrinsic_set_range(instr, 128); - } - - if (index) - instr->src[src++] = nir_src_for_ssa(index); - - instr->src[src++] = nir_src_for_ssa(offset); - - if (load) { - nir_ssa_dest_init(&instr->instr, &instr->dest, - instr->num_components, - glsl_get_bit_size(glsl_get_base_type(type)), NULL); - (*inout)->def = &instr->dest.ssa; - } - - nir_builder_instr_insert(&b->nb, &instr->instr); - - if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) - (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); -} - -static void -_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, - nir_ssa_def *index, nir_ssa_def *offset, - struct vtn_access_chain *chain, unsigned chain_idx, - struct vtn_type *type, struct vtn_ssa_value **inout) -{ - if (chain && chain_idx >= chain->length) - chain = NULL; - - if (load && chain == NULL && *inout == NULL) - *inout = vtn_create_ssa_value(b, type->type); - - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* This is where things get interesting. At this point, we've hit - * a vector, a scalar, or a matrix. - */ - if (glsl_type_is_matrix(type->type)) { - if (chain == NULL) { - /* Loading the whole matrix */ - struct vtn_ssa_value *transpose; - unsigned num_ops, vec_width; - if (type->row_major) { - num_ops = glsl_get_vector_elements(type->type); - vec_width = glsl_get_matrix_columns(type->type); - if (load) { - const struct glsl_type *transpose_type = - glsl_matrix_type(base_type, vec_width, num_ops); - *inout = vtn_create_ssa_value(b, transpose_type); - } else { - transpose = vtn_ssa_transpose(b, *inout); - inout = &transpose; - } - } else { - num_ops = glsl_get_matrix_columns(type->type); - vec_width = glsl_get_vector_elements(type->type); - } - - for (unsigned i = 0; i < num_ops; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - _vtn_load_store_tail(b, op, load, index, elem_offset, - &(*inout)->elems[i], - glsl_vector_type(base_type, vec_width)); - } - - if (load && type->row_major) - *inout = vtn_ssa_transpose(b, *inout); - } else if (type->row_major) { - /* Row-major but with an access chiain. */ - nir_ssa_def *col_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], - type->array_element->stride); - offset = nir_iadd(&b->nb, offset, col_offset); - - if (chain_idx + 1 < chain->length) { - /* Picking off a single element */ - nir_ssa_def *row_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], - type->stride); - offset = nir_iadd(&b->nb, offset, row_offset); - if (load) - *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); - _vtn_load_store_tail(b, op, load, index, offset, inout, - glsl_scalar_type(base_type)); - } else { - /* Grabbing a column; picking one element off each row */ - unsigned num_comps = glsl_get_vector_elements(type->type); - const struct glsl_type *column_type = - glsl_get_column_type(type->type); - - nir_ssa_def *comps[4]; - for (unsigned i = 0; i < num_comps; i++) { - nir_ssa_def *elem_offset = - nir_iadd(&b->nb, offset, - nir_imm_int(&b->nb, i * type->stride)); - - struct vtn_ssa_value *comp, temp_val; - if (!load) { - temp_val.def = nir_channel(&b->nb, (*inout)->def, i); - temp_val.type = glsl_scalar_type(base_type); - } - comp = &temp_val; - _vtn_load_store_tail(b, op, load, index, elem_offset, - &comp, glsl_scalar_type(base_type)); - comps[i] = comp->def; - } - - if (load) { - if (*inout == NULL) - *inout = vtn_create_ssa_value(b, column_type); - - (*inout)->def = nir_vec(&b->nb, comps, num_comps); - } - } - } else { - /* Column-major with a deref. Fall through to array case. */ - nir_ssa_def *col_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); - offset = nir_iadd(&b->nb, offset, col_offset); - - _vtn_block_load_store(b, op, load, index, offset, - chain, chain_idx + 1, - type->array_element, inout); - } - } else if (chain == NULL) { - /* Single whole vector */ - assert(glsl_type_is_vector_or_scalar(type->type)); - _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); - } else { - /* Single component of a vector. Fall through to array case. */ - nir_ssa_def *elem_offset = - vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); - offset = nir_iadd(&b->nb, offset, elem_offset); - - _vtn_block_load_store(b, op, load, index, offset, NULL, 0, - type->array_element, inout); - } - return; - - case GLSL_TYPE_ARRAY: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->array_element, &(*inout)->elems[i]); - } - return; - } - - case GLSL_TYPE_STRUCT: { - unsigned elems = glsl_get_length(type->type); - for (unsigned i = 0; i < elems; i++) { - nir_ssa_def *elem_off = - nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); - _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, - type->members[i], &(*inout)->elems[i]); - } - return; - } - - default: - unreachable("Invalid block member type"); - } -} - -static struct vtn_ssa_value * -vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - nir_intrinsic_op op; - switch (src->var->mode) { - case vtn_variable_mode_ubo: - op = nir_intrinsic_load_ubo; - break; - case vtn_variable_mode_ssbo: - op = nir_intrinsic_load_ssbo; - break; - case vtn_variable_mode_push_constant: - op = nir_intrinsic_load_push_constant; - break; - default: - assert(!"Invalid block variable mode"); - } - - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); - - struct vtn_ssa_value *value = NULL; - _vtn_block_load_store(b, op, true, index, offset, - src, chain_idx, type, &value); - return value; -} - -static void -vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dst) -{ - nir_ssa_def *offset, *index = NULL; - struct vtn_type *type; - unsigned chain_idx; - offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); - - _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, - dst, chain_idx, type, &src); -} - -static bool -vtn_variable_is_external_block(struct vtn_variable *var) -{ - return var->mode == vtn_variable_mode_ssbo || - var->mode == vtn_variable_mode_ubo || - var->mode == vtn_variable_mode_push_constant; -} - -static void -_vtn_variable_load_store(struct vtn_builder *b, bool load, - struct vtn_access_chain *chain, - struct vtn_type *tail_type, - struct vtn_ssa_value **inout) -{ - enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* At this point, we have a scalar, vector, or matrix so we know that - * there cannot be any structure splitting still in the way. By - * stopping at the matrix level rather than the vector level, we - * ensure that matrices get loaded in the optimal way even if they - * are storred row-major in a UBO. - */ - if (load) { - *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); - } else { - vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); - } - return; - - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: { - struct vtn_access_chain *new_chain = - vtn_access_chain_extend(b, chain, 1); - new_chain->link[chain->length].mode = vtn_access_mode_literal; - unsigned elems = glsl_get_length(tail_type->type); - if (load) { - assert(*inout == NULL); - *inout = rzalloc(b, struct vtn_ssa_value); - (*inout)->type = tail_type->type; - (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); - } - for (unsigned i = 0; i < elems; i++) { - new_chain->link[chain->length].id = i; - struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? - tail_type->array_element : tail_type->members[i]; - _vtn_variable_load_store(b, load, new_chain, elem_type, - &(*inout)->elems[i]); - } - return; - } - - default: - unreachable("Invalid access chain type"); - } -} - -struct vtn_ssa_value * -vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) -{ - if (vtn_variable_is_external_block(src->var)) { - return vtn_block_load(b, src); - } else { - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); - struct vtn_ssa_value *val = NULL; - _vtn_variable_load_store(b, true, src, tail_type, &val); - return val; - } -} - -void -vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, - struct vtn_access_chain *dest) -{ - if (vtn_variable_is_external_block(dest->var)) { - assert(dest->var->mode == vtn_variable_mode_ssbo); - vtn_block_store(b, src, dest); - } else { - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); - _vtn_variable_load_store(b, false, dest, tail_type, &src); - } -} - -static void -_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, - struct vtn_access_chain *src, struct vtn_type *tail_type) -{ - enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - /* At this point, we have a scalar, vector, or matrix so we know that - * there cannot be any structure splitting still in the way. By - * stopping at the matrix level rather than the vector level, we - * ensure that matrices get loaded in the optimal way even if they - * are storred row-major in a UBO. - */ - vtn_variable_store(b, vtn_variable_load(b, src), dest); - return; - - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: { - struct vtn_access_chain *new_src, *new_dest; - new_src = vtn_access_chain_extend(b, src, 1); - new_dest = vtn_access_chain_extend(b, dest, 1); - new_src->link[src->length].mode = vtn_access_mode_literal; - new_dest->link[dest->length].mode = vtn_access_mode_literal; - unsigned elems = glsl_get_length(tail_type->type); - for (unsigned i = 0; i < elems; i++) { - new_src->link[src->length].id = i; - new_dest->link[dest->length].id = i; - struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? - tail_type->array_element : tail_type->members[i]; - _vtn_variable_copy(b, new_dest, new_src, elem_type); - } - return; - } - - default: - unreachable("Invalid access chain type"); - } -} - -static void -vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, - struct vtn_access_chain *src) -{ - struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); - assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); - - /* TODO: At some point, we should add a special-case for when we can - * just emit a copy_var intrinsic. - */ - _vtn_variable_copy(b, dest, src, tail_type); -} - -static void -set_mode_system_value(nir_variable_mode *mode) -{ - assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); - *mode = nir_var_system_value; -} - -static void -vtn_get_builtin_location(struct vtn_builder *b, - SpvBuiltIn builtin, int *location, - nir_variable_mode *mode) -{ - switch (builtin) { - case SpvBuiltInPosition: - *location = VARYING_SLOT_POS; - break; - case SpvBuiltInPointSize: - *location = VARYING_SLOT_PSIZ; - break; - case SpvBuiltInClipDistance: - *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ - break; - case SpvBuiltInCullDistance: - /* XXX figure this out */ - break; - case SpvBuiltInVertexIndex: - *location = SYSTEM_VALUE_VERTEX_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInVertexId: - /* Vulkan defines VertexID to be zero-based and reserves the new - * builtin keyword VertexIndex to indicate the non-zero-based value. - */ - *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceIndex: - *location = SYSTEM_VALUE_INSTANCE_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInInstanceId: - *location = SYSTEM_VALUE_INSTANCE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; - break; - case SpvBuiltInInvocationId: - *location = SYSTEM_VALUE_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLayer: - *location = VARYING_SLOT_LAYER; - *mode = nir_var_shader_out; - break; - case SpvBuiltInViewportIndex: - *location = VARYING_SLOT_VIEWPORT; - if (b->shader->stage == MESA_SHADER_GEOMETRY) - *mode = nir_var_shader_out; - else if (b->shader->stage == MESA_SHADER_FRAGMENT) - *mode = nir_var_shader_in; - else - unreachable("invalid stage for SpvBuiltInViewportIndex"); - break; - case SpvBuiltInTessLevelOuter: - case SpvBuiltInTessLevelInner: - case SpvBuiltInTessCoord: - case SpvBuiltInPatchVertices: - unreachable("no tessellation support"); - case SpvBuiltInFragCoord: - *location = VARYING_SLOT_POS; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInPointCoord: - *location = VARYING_SLOT_PNTC; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInFrontFacing: - *location = VARYING_SLOT_FACE; - assert(*mode == nir_var_shader_in); - break; - case SpvBuiltInSampleId: - *location = SYSTEM_VALUE_SAMPLE_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInSamplePosition: - *location = SYSTEM_VALUE_SAMPLE_POS; - set_mode_system_value(mode); - break; - case SpvBuiltInSampleMask: - *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ - set_mode_system_value(mode); - break; - case SpvBuiltInFragDepth: - *location = FRAG_RESULT_DEPTH; - assert(*mode == nir_var_shader_out); - break; - case SpvBuiltInNumWorkgroups: - *location = SYSTEM_VALUE_NUM_WORK_GROUPS; - set_mode_system_value(mode); - break; - case SpvBuiltInWorkgroupSize: - /* This should already be handled */ - unreachable("unsupported builtin"); - break; - case SpvBuiltInWorkgroupId: - *location = SYSTEM_VALUE_WORK_GROUP_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationId: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInLocalInvocationIndex: - *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; - set_mode_system_value(mode); - break; - case SpvBuiltInGlobalInvocationId: - *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; - set_mode_system_value(mode); - break; - case SpvBuiltInHelperInvocation: - default: - unreachable("unsupported builtin"); - } -} - -static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_var) -{ - struct vtn_variable *vtn_var = void_var; - - /* Handle decorations that apply to a vtn_variable as a whole */ - switch (dec->decoration) { - case SpvDecorationBinding: - vtn_var->binding = dec->literals[0]; - return; - case SpvDecorationDescriptorSet: - vtn_var->descriptor_set = dec->literals[0]; - return; - - case SpvDecorationLocation: { - unsigned location = dec->literals[0]; - bool is_vertex_input; - if (b->shader->stage == MESA_SHADER_FRAGMENT && - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - vtn_var->mode == vtn_variable_mode_input) { - is_vertex_input = true; - location += VERT_ATTRIB_GENERIC0; - } else if (vtn_var->mode == vtn_variable_mode_input || - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += VARYING_SLOT_VAR0; - } else { - assert(!"Location must be on input or output variable"); - } - - if (vtn_var->var) { - vtn_var->var->data.location = location; - vtn_var->var->data.explicit_location = true; - } else { - assert(vtn_var->members); - unsigned length = glsl_get_length(vtn_var->type->type); - for (unsigned i = 0; i < length; i++) { - vtn_var->members[i]->data.location = location; - vtn_var->members[i]->data.explicit_location = true; - location += - glsl_count_attribute_slots(vtn_var->members[i]->interface_type, - is_vertex_input); - } - } - return; - } - - default: - break; - } - - /* Now we handle decorations that apply to a particular nir_variable */ - nir_variable *nir_var = vtn_var->var; - if (val->value_type == vtn_value_type_access_chain) { - assert(val->access_chain->length == 0); - assert(val->access_chain->var == void_var); - assert(member == -1); - } else { - assert(val->value_type == vtn_value_type_type); - if (member != -1) - nir_var = vtn_var->members[member]; - } - - if (nir_var == NULL) - return; - - switch (dec->decoration) { - case SpvDecorationRelaxedPrecision: - break; /* FIXME: Do nothing with this for now. */ - case SpvDecorationNoPerspective: - nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - break; - case SpvDecorationFlat: - nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; - break; - case SpvDecorationCentroid: - nir_var->data.centroid = true; - break; - case SpvDecorationSample: - nir_var->data.sample = true; - break; - case SpvDecorationInvariant: - nir_var->data.invariant = true; - break; - case SpvDecorationConstant: - assert(nir_var->constant_initializer != NULL); - nir_var->data.read_only = true; - break; - case SpvDecorationNonWritable: - nir_var->data.read_only = true; - break; - case SpvDecorationComponent: - nir_var->data.location_frac = dec->literals[0]; - break; - case SpvDecorationIndex: - nir_var->data.explicit_index = true; - nir_var->data.index = dec->literals[0]; - break; - case SpvDecorationBuiltIn: { - SpvBuiltIn builtin = dec->literals[0]; - - if (builtin == SpvBuiltInWorkgroupSize) { - /* This shouldn't be a builtin. It's actually a constant. */ - nir_var->data.mode = nir_var_global; - nir_var->data.read_only = true; - - nir_constant *c = rzalloc(nir_var, nir_constant); - c->value.u[0] = b->shader->info.cs.local_size[0]; - c->value.u[1] = b->shader->info.cs.local_size[1]; - c->value.u[2] = b->shader->info.cs.local_size[2]; - nir_var->constant_initializer = c; - break; - } - - nir_variable_mode mode = nir_var->data.mode; - vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); - nir_var->data.explicit_location = true; - nir_var->data.mode = mode; - - if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) - nir_var->data.origin_upper_left = b->origin_upper_left; - break; - } - case SpvDecorationRowMajor: - case SpvDecorationColMajor: - case SpvDecorationGLSLShared: - case SpvDecorationPatch: - case SpvDecorationRestrict: - case SpvDecorationAliased: - case SpvDecorationVolatile: - case SpvDecorationCoherent: - case SpvDecorationNonReadable: - case SpvDecorationUniform: - /* This is really nice but we have no use for it right now. */ - case SpvDecorationCPacked: - case SpvDecorationSaturatedConversion: - case SpvDecorationStream: - case SpvDecorationOffset: - case SpvDecorationXfbBuffer: - case SpvDecorationFuncParamAttr: - case SpvDecorationFPRoundingMode: - case SpvDecorationFPFastMathMode: - case SpvDecorationLinkageAttributes: - case SpvDecorationSpecId: - break; - default: - unreachable("Unhandled variable decoration"); - } -} - -/* Tries to compute the size of an interface block based on the strides and - * offsets that are provided to us in the SPIR-V source. - */ -static unsigned -vtn_type_block_size(struct vtn_type *type) -{ - enum glsl_base_type base_type = glsl_get_base_type(type->type); - switch (base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_DOUBLE: { - unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : - glsl_get_matrix_columns(type->type); - if (cols > 1) { - assert(type->stride > 0); - return type->stride * cols; - } else if (base_type == GLSL_TYPE_DOUBLE) { - return glsl_get_vector_elements(type->type) * 8; - } else { - return glsl_get_vector_elements(type->type) * 4; - } - } - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - unsigned num_fields = glsl_get_length(type->type); - for (unsigned f = 0; f < num_fields; f++) { - unsigned field_end = type->offsets[f] + - vtn_type_block_size(type->members[f]); - size = MAX2(size, field_end); - } - return size; - } - - case GLSL_TYPE_ARRAY: - assert(type->stride > 0); - assert(glsl_get_length(type->type) > 0); - return type->stride * glsl_get_length(type->type); - - default: - assert(!"Invalid block type"); - return 0; - } -} - -void -vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) -{ - switch (opcode) { - case SpvOpVariable: { - struct vtn_variable *var = rzalloc(b, struct vtn_variable); - var->type = vtn_value(b, w[1], vtn_value_type_type)->type; - - var->chain.var = var; - var->chain.length = 0; - - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - val->access_chain = &var->chain; - - struct vtn_type *without_array = var->type; - while(glsl_type_is_array(without_array->type)) - without_array = without_array->array_element; - - nir_variable_mode nir_mode; - switch ((SpvStorageClass)w[3]) { - case SpvStorageClassUniform: - case SpvStorageClassUniformConstant: - if (without_array->block) { - var->mode = vtn_variable_mode_ubo; - b->shader->info.num_ubos++; - } else if (without_array->buffer_block) { - var->mode = vtn_variable_mode_ssbo; - b->shader->info.num_ssbos++; - } else if (glsl_type_is_image(without_array->type)) { - var->mode = vtn_variable_mode_image; - nir_mode = nir_var_uniform; - b->shader->info.num_images++; - } else if (glsl_type_is_sampler(without_array->type)) { - var->mode = vtn_variable_mode_sampler; - nir_mode = nir_var_uniform; - b->shader->info.num_textures++; - } else { - assert(!"Invalid uniform variable type"); - } - break; - case SpvStorageClassPushConstant: - var->mode = vtn_variable_mode_push_constant; - assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; - break; - case SpvStorageClassInput: - var->mode = vtn_variable_mode_input; - nir_mode = nir_var_shader_in; - break; - case SpvStorageClassOutput: - var->mode = vtn_variable_mode_output; - nir_mode = nir_var_shader_out; - break; - case SpvStorageClassPrivate: - var->mode = vtn_variable_mode_global; - nir_mode = nir_var_global; - break; - case SpvStorageClassFunction: - var->mode = vtn_variable_mode_local; - nir_mode = nir_var_local; - break; - case SpvStorageClassWorkgroup: - var->mode = vtn_variable_mode_workgroup; - nir_mode = nir_var_shared; - break; - case SpvStorageClassCrossWorkgroup: - case SpvStorageClassGeneric: - case SpvStorageClassAtomicCounter: - default: - unreachable("Unhandled variable storage class"); - } - - switch (var->mode) { - case vtn_variable_mode_local: - case vtn_variable_mode_global: - case vtn_variable_mode_image: - case vtn_variable_mode_sampler: - case vtn_variable_mode_workgroup: - /* For these, we create the variable normally */ - var->var = rzalloc(b->shader, nir_variable); - var->var->name = ralloc_strdup(var->var, val->name); - var->var->type = var->type->type; - var->var->data.mode = nir_mode; - - switch (var->mode) { - case vtn_variable_mode_image: - case vtn_variable_mode_sampler: - var->var->interface_type = without_array->type; - break; - default: - var->var->interface_type = NULL; - break; - } - break; - - case vtn_variable_mode_input: - case vtn_variable_mode_output: { - /* For inputs and outputs, we immediately split structures. This - * is for a couple of reasons. For one, builtins may all come in - * a struct and we really want those split out into separate - * variables. For another, interpolation qualifiers can be - * applied to members of the top-level struct ane we need to be - * able to preserve that information. - */ - - int array_length = -1; - struct vtn_type *interface_type = var->type; - if (b->shader->stage == MESA_SHADER_GEOMETRY && - glsl_type_is_array(var->type->type)) { - /* In Geometry shaders (and some tessellation), inputs come - * in per-vertex arrays. However, some builtins come in - * non-per-vertex, hence the need for the is_array check. In - * any case, there are no non-builtin arrays allowed so this - * check should be sufficient. - */ - interface_type = var->type->array_element; - array_length = glsl_get_length(var->type->type); - } - - if (glsl_type_is_struct(interface_type->type)) { - /* It's a struct. Split it. */ - unsigned num_members = glsl_get_length(interface_type->type); - var->members = ralloc_array(b, nir_variable *, num_members); - - for (unsigned i = 0; i < num_members; i++) { - const struct glsl_type *mtype = interface_type->members[i]->type; - if (array_length >= 0) - mtype = glsl_array_type(mtype, array_length); - - var->members[i] = rzalloc(b->shader, nir_variable); - var->members[i]->name = - ralloc_asprintf(var->members[i], "%s.%d", val->name, i); - var->members[i]->type = mtype; - var->members[i]->interface_type = - interface_type->members[i]->type; - var->members[i]->data.mode = nir_mode; - } - } else { - var->var = rzalloc(b->shader, nir_variable); - var->var->name = ralloc_strdup(var->var, val->name); - var->var->type = var->type->type; - var->var->interface_type = interface_type->type; - var->var->data.mode = nir_mode; - } - - /* For inputs and outputs, we need to grab locations and builtin - * information from the interface type. - */ - vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); - break; - - case vtn_variable_mode_param: - unreachable("Not created through OpVariable"); - } - - case vtn_variable_mode_ubo: - case vtn_variable_mode_ssbo: - case vtn_variable_mode_push_constant: - /* These don't need actual variables. */ - break; - } - - if (count > 4) { - assert(count == 5); - nir_constant *constant = - vtn_value(b, w[4], vtn_value_type_constant)->constant; - var->var->constant_initializer = - nir_constant_clone(constant, var->var); - } - - vtn_foreach_decoration(b, val, var_decoration_cb, var); - - if (var->mode == vtn_variable_mode_image || - var->mode == vtn_variable_mode_sampler) { - /* XXX: We still need the binding information in the nir_variable - * for these. We should fix that. - */ - var->var->data.binding = var->binding; - var->var->data.descriptor_set = var->descriptor_set; - - if (var->mode == vtn_variable_mode_image) - var->var->data.image.format = without_array->image_format; - } - - if (var->mode == vtn_variable_mode_local) { - assert(var->members == NULL && var->var != NULL); - nir_function_impl_add_variable(b->impl, var->var); - } else if (var->var) { - nir_shader_add_variable(b->shader, var->var); - } else if (var->members) { - unsigned count = glsl_get_length(without_array->type); - for (unsigned i = 0; i < count; i++) { - assert(var->members[i]->data.mode != nir_var_local); - nir_shader_add_variable(b->shader, var->members[i]); - } - } else { - assert(var->mode == vtn_variable_mode_ubo || - var->mode == vtn_variable_mode_ssbo || - var->mode == vtn_variable_mode_push_constant); - } - break; - } - - case SpvOpAccessChain: - case SpvOpInBoundsAccessChain: { - struct vtn_access_chain *base, *chain; - struct vtn_value *base_val = vtn_untyped_value(b, w[3]); - if (base_val->value_type == vtn_value_type_sampled_image) { - /* This is rather insane. SPIR-V allows you to use OpSampledImage - * to combine an array of images with a single sampler to get an - * array of sampled images that all share the same sampler. - * Fortunately, this means that we can more-or-less ignore the - * sampler when crawling the access chain, but it does leave us - * with this rather awkward little special-case. - */ - base = base_val->sampled_image->image; - } else { - assert(base_val->value_type == vtn_value_type_access_chain); - base = base_val->access_chain; - } - - chain = vtn_access_chain_extend(b, base, count - 4); - - unsigned idx = base->length; - for (int i = 4; i < count; i++) { - struct vtn_value *link_val = vtn_untyped_value(b, w[i]); - if (link_val->value_type == vtn_value_type_constant) { - chain->link[idx].mode = vtn_access_mode_literal; - chain->link[idx].id = link_val->constant->value.u[0]; - } else { - chain->link[idx].mode = vtn_access_mode_id; - chain->link[idx].id = w[i]; - } - idx++; - } - - if (base_val->value_type == vtn_value_type_sampled_image) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_sampled_image); - val->sampled_image = ralloc(b, struct vtn_sampled_image); - val->sampled_image->image = chain; - val->sampled_image->sampler = base_val->sampled_image->sampler; - } else { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_access_chain); - val->access_chain = chain; - } - break; - } - - case SpvOpCopyMemory: { - struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); - struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); - - vtn_variable_copy(b, dest->access_chain, src->access_chain); - break; - } - - case SpvOpLoad: { - struct vtn_access_chain *src = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - if (src->var->mode == vtn_variable_mode_image || - src->var->mode == vtn_variable_mode_sampler) { - vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; - return; - } - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_variable_load(b, src); - break; - } - - case SpvOpStore: { - struct vtn_access_chain *dest = - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; - struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); - vtn_variable_store(b, src, dest); - break; - } - - case SpvOpArrayLength: { - struct vtn_access_chain *chain = - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; - - const uint32_t offset = chain->var->type->offsets[w[4]]; - const uint32_t stride = chain->var->type->members[w[4]]->stride; - - unsigned chain_idx; - struct vtn_type *type; - nir_ssa_def *index = - get_vulkan_resource_index(b, chain, &type, &chain_idx); - - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(b->nb.shader, - nir_intrinsic_get_buffer_size); - instr->src[0] = nir_src_for_ssa(index); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); - nir_builder_instr_insert(&b->nb, &instr->instr); - nir_ssa_def *buf_size = &instr->dest.ssa; - - /* array_length = max(buffer_size - offset, 0) / stride */ - nir_ssa_def *array_length = - nir_idiv(&b->nb, - nir_imax(&b->nb, - nir_isub(&b->nb, - buf_size, - nir_imm_int(&b->nb, offset)), - nir_imm_int(&b->nb, 0u)), - nir_imm_int(&b->nb, stride)); - - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); - val->ssa->def = array_length; - break; - } - - case SpvOpCopyMemorySized: - default: - unreachable("Unhandled opcode"); - } -} diff --git a/src/compiler/spirv/GLSL.std.450.h b/src/compiler/spirv/GLSL.std.450.h new file mode 100644 index 00000000000..d1c9b5c1d44 --- /dev/null +++ b/src/compiler/spirv/GLSL.std.450.h @@ -0,0 +1,127 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +const int GLSLstd450Version = 99; +const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h new file mode 100644 index 00000000000..500f2cb94df --- /dev/null +++ b/src/compiler/spirv/nir_spirv.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir/nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nir_spirv_specialization { + uint32_t id; + uint32_t data; +}; + +nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *specializations, + unsigned num_specializations, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/compiler/spirv/spirv.h b/src/compiler/spirv/spirv.h new file mode 100644 index 00000000000..63bcb2f88dd --- /dev/null +++ b/src/compiler/spirv/spirv.h @@ -0,0 +1,870 @@ +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 2 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 2; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c new file mode 100644 index 00000000000..99514b49650 --- /dev/null +++ b/src/compiler/spirv/spirv_to_nir.c @@ -0,0 +1,2710 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" +#include "nir/nir_control_flow.h" +#include "nir/nir_constant_expressions.h" + +static struct vtn_ssa_value * +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(val->type)); + val->def = nir_ssa_undef(&b->nb, num_components, bit_size); + } else { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_matrix(type)) { + const struct glsl_type *elem_type = + glsl_vector_type(glsl_get_base_type(type), + glsl_get_vector_elements(type)); + + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else if (glsl_type_is_array(type)) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } else { + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = glsl_get_struct_field(type, i); + val->elems[i] = vtn_undef_ssa_value(b, elem_type); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components, 32); + + for (unsigned i = 0; i < num_components; i++) + load->value.u32[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows, 32); + + for (unsigned j = 0; j < rows; j++) + load->value.u32[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_undef: + return vtn_undef_ssa_value(b, val->type->type); + + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + + case vtn_value_type_access_chain: + /* This is needed for function parameters */ + return vtn_variable_load(b, val->access_chain); + + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count, unsigned *words_used) +{ + char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); + if (words_used) { + /* Ammount of space taken by the string (including the null) */ + unsigned len = strlen(dup) + 1; + *words_used = DIV_ROUND_UP(len, sizeof(*words)); + } + return dup; +} + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + b->file = NULL; + b->line = -1; + b->col = -1; + + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + switch (opcode) { + case SpvOpNop: + break; /* Do nothing */ + + case SpvOpLine: + b->file = vtn_value(b, w[1], vtn_value_type_string)->str; + b->line = w[2]; + b->col = w[3]; + break; + + case SpvOpNoLine: + b->file = NULL; + b->line = -1; + b->col = -1; + break; + + default: + if (!handler(b, opcode, w, count)) + return w; + break; + } + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->scope == VTN_DEC_DECORATION) { + member = parent_member; + } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { + assert(parent_member == -1); + member = dec->scope - VTN_DEC_STRUCT_MEMBER0; + } else { + /* Not a decoration */ + continue; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +void +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->scope != VTN_DEC_EXECUTION_MODE) + continue; + + assert(dec->group == NULL); + cb(b, value, dec, data); + } +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_decoration_group); + break; + + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpExecutionMode: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + switch (opcode) { + case SpvOpDecorate: + dec->scope = VTN_DEC_DECORATION; + break; + case SpvOpMemberDecorate: + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); + break; + case SpvOpExecutionMode: + dec->scope = VTN_DEC_EXECUTION_MODE; + break; + default: + unreachable("Invalid decoration opcode"); + } + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + case SpvOpGroupDecorate: { + struct vtn_value *group = + vtn_value(b, target, vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = vtn_untyped_value(b, *w); + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + + dec->group = group; + if (opcode == SpvOpGroupDecorate) { + dec->scope = VTN_DEC_DECORATION; + } else { + dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); + } + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + unsigned num_fields; + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_ARRAY: + dest->row_major = src->row_major; + dest->stride = src->stride; + dest->array_element = src->array_element; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static struct vtn_type * +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) +{ + type->members[member] = vtn_type_copy(b, type->members[member]); + type = type->members[member]; + + /* We may have an array of matrices.... Oh, joy! */ + while (glsl_type_is_array(type->type)) { + type->array_element = vtn_type_copy(b, type->array_element); + type = type->array_element; + } + + assert(glsl_type_is_matrix(type->type)); + + return type; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + assert(member < ctx->num_fields); + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + case SpvDecorationRowMajor: + mutable_matrix_member(b, ctx->type, member)->row_major = true; + break; + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + val->type->val = val; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: { + const bool signedness = w[3]; + val->type->type = (signedness ? glsl_int_type() : glsl_uint_type()); + break; + } + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base->type)); + val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); + + /* Vectors implicitly have sizeof(base_type) stride. For now, this + * is always 4 bytes. This will have to change if we want to start + * supporting doubles or half-floats. + */ + val->type->stride = 4; + val->type->array_element = base; + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + assert(!glsl_type_is_error(val->type->type)); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + unsigned length; + if (opcode == SpvOpTypeRuntimeArray) { + /* A length of 0 is used to denote unsized arrays */ + length = 0; + } else { + length = + vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; + } + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i] = (struct glsl_struct_field) { + .type = val->type->members[i]->type, + .name = ralloc_asprintf(b, "field%d", i), + .location = -1, + }; + } + + struct member_decoration_ctx ctx = { + .num_fields = num_fields, + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + if (count > 9) + val->type->access_qualifier = w[9]; + else + val->type->access_qualifier = SpvAccessQualifierReadWrite; + + if (multisampled) { + assert(dim == GLSL_SAMPLER_DIM_2D); + dim = GLSL_SAMPLER_DIM_MS; + } + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + */ + val->type->type = glsl_bare_sampler_type(); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static nir_constant * +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) +{ + nir_constant *c = rzalloc(b, nir_constant); + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* Nothing to do here. It's already initialized to zero */ + break; + + case GLSL_TYPE_ARRAY: + assert(glsl_get_length(type) > 0); + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); + for (unsigned i = 1; i < c->num_elements; i++) + c->elements[i] = c->elements[0]; + break; + + case GLSL_TYPE_STRUCT: + c->num_elements = glsl_get_length(type); + c->elements = ralloc_array(b, nir_constant *, c->num_elements); + + for (unsigned i = 0; i < c->num_elements; i++) { + c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); + } + break; + + default: + unreachable("Invalid type for null constant"); + } + + return c; +} + +static void +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, + int member, const struct vtn_decoration *dec, + void *data) +{ + assert(member == -1); + if (dec->decoration != SpvDecorationSpecId) + return; + + uint32_t *const_value = data; + + for (unsigned i = 0; i < b->num_specializations; i++) { + if (b->specializations[i].id == dec->literals[0]) { + *const_value = b->specializations[i].data; + return; + } + } +} + +static uint32_t +get_specialization(struct vtn_builder *b, struct vtn_value *val, + uint32_t const_value) +{ + vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); + return const_value; +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = rzalloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: { + assert(val->const_type == glsl_bool_type()); + uint32_t int_val = + get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); + val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; + break; + } + + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpSpecConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = get_specialization(b, val, w[3]); + break; + case SpvOpSpecConstantComposite: + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->num_elements = elem_count; + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + case SpvOpSpecConstantOp: { + SpvOp opcode = get_specialization(b, val, w[3]); + switch (opcode) { + case SpvOpVectorShuffle: { + struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); + struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); + unsigned len0 = glsl_get_vector_elements(v0->const_type); + unsigned len1 = glsl_get_vector_elements(v1->const_type); + + uint32_t u[8]; + for (unsigned i = 0; i < len0; i++) + u[i] = v0->constant->value.u[i]; + for (unsigned i = 0; i < len1; i++) + u[len0 + i] = v1->constant->value.u[i]; + + for (unsigned i = 0; i < count - 6; i++) { + uint32_t comp = w[i + 6]; + if (comp == (uint32_t)-1) { + val->constant->value.u[i] = 0xdeadbeef; + } else { + val->constant->value.u[i] = u[comp]; + } + } + return; + } + + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: { + struct vtn_value *comp; + unsigned deref_start; + struct nir_constant **c; + if (opcode == SpvOpCompositeExtract) { + comp = vtn_value(b, w[4], vtn_value_type_constant); + deref_start = 5; + c = &comp->constant; + } else { + comp = vtn_value(b, w[5], vtn_value_type_constant); + deref_start = 6; + val->constant = nir_constant_clone(comp->constant, + (nir_variable *)b); + c = &val->constant; + } + + int elem = -1; + const struct glsl_type *type = comp->const_type; + for (unsigned i = deref_start; i < count; i++) { + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* If we hit this granularity, we're picking off an element */ + if (elem < 0) + elem = 0; + + if (glsl_type_is_matrix(type)) { + elem += w[i] * glsl_get_vector_elements(type); + type = glsl_get_column_type(type); + } else { + assert(glsl_type_is_vector(type)); + elem += w[i]; + type = glsl_scalar_type(glsl_get_base_type(type)); + } + continue; + + case GLSL_TYPE_ARRAY: + c = &(*c)->elements[w[i]]; + type = glsl_get_array_element(type); + continue; + + case GLSL_TYPE_STRUCT: + c = &(*c)->elements[w[i]]; + type = glsl_get_struct_field(type, w[i]); + continue; + + default: + unreachable("Invalid constant type"); + } + } + + if (opcode == SpvOpCompositeExtract) { + if (elem == -1) { + val->constant = *c; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + val->constant->value.u[i] = (*c)->value.u[elem + i]; + } + } else { + struct vtn_value *insert = + vtn_value(b, w[4], vtn_value_type_constant); + assert(insert->const_type == type); + if (elem == -1) { + *c = insert->constant; + } else { + unsigned num_components = glsl_get_vector_elements(type); + for (unsigned i = 0; i < num_components; i++) + (*c)->value.u[elem + i] = insert->constant->value.u[i]; + } + } + return; + } + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + unsigned num_components = glsl_get_vector_elements(val->const_type); + unsigned bit_size = + glsl_get_bit_size(glsl_get_base_type(val->const_type)); + + nir_const_value src[3]; + assert(count <= 7); + for (unsigned i = 0; i < count - 4; i++) { + nir_constant *c = + vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; + + unsigned j = swap ? 1 - i : i; + assert(bit_size == 32); + for (unsigned k = 0; k < num_components; k++) + src[j].u32[k] = c->value.u[k]; + } + + nir_const_value res = nir_eval_const_opcode(op, num_components, + bit_size, src); + + for (unsigned k = 0; k < num_components; k++) + val->constant->value.u[k] = res.u32[k]; + + return; + } /* default */ + } + } + + case SpvOpConstantNull: + val->constant = vtn_null_constant(b, val->const_type); + break; + + case SpvOpConstantSampler: + assert(!"OpConstantSampler requires Kernel Capability"); + break; + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct nir_function *callee = + vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; + + nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); + for (unsigned i = 0; i < call->num_params; i++) { + unsigned arg_id = w[4 + i]; + struct vtn_value *arg = vtn_untyped_value(b, arg_id); + if (arg->value_type == vtn_value_type_access_chain) { + nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); + call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); + } else { + struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); + + /* Make a temporary to store the argument in */ + nir_variable *tmp = + nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); + call->params[i] = nir_deref_var_create(call, tmp); + + vtn_local_store(b, arg_ssa, call->params[i]); + } + } + + nir_variable *out_tmp = NULL; + if (!glsl_type_is_void(callee->return_type)) { + out_tmp = nir_local_variable_create(b->impl, callee->return_type, + "out_tmp"); + call->return_deref = nir_deref_var_create(call, out_tmp); + } + + nir_builder_instr_insert(&b->nb, &call->instr); + + if (glsl_type_is_void(callee->return_type)) { + vtn_push_value(b, w[2], vtn_value_type_undef); + } else { + struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); + retval->ssa = vtn_local_load(b, call->return_deref); + } +} + +struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; + return; + } else if (opcode == SpvOpImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + struct vtn_value *src_val = vtn_untyped_value(b, w[3]); + if (src_val->value_type == vtn_value_type_sampled_image) { + val->access_chain = src_val->sampled_image->image; + } else { + assert(src_val->value_type == vtn_value_type_access_chain); + val->access_chain = src_val->access_chain; + } + return; + } + + struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_access_chain); + sampled.image = NULL; + sampled.sampler = sampled_val->access_chain; + } + + const struct glsl_type *image_type; + if (sampled.image) { + image_type = sampled.image->var->var->interface_type; + } else { + image_type = sampled.sampler->var->var->interface_type; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + bool has_coord = false; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + has_coord = true; + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* For OpImageQuerySizeLod, we always have an LOD */ + if (opcode == SpvOpImageQuerySizeLod) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { + texop = nir_texop_txf_ms; + } else { + texop = nir_texop_txf; + } + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txf_ms || texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf_ms); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + instr->op = texop; + + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + + instr->sampler_dim = glsl_get_sampler_dim(image_type); + instr->is_array = glsl_sampler_type_is_array(image_type); + instr->is_shadow = glsl_sampler_type_is_shadow(image_type); + instr->is_new_style_shadow = instr->is_shadow; + + if (has_coord) { + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + instr->coord_components = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: + instr->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + instr->coord_components = 3; + break; + default: + assert("Invalid sampler type"); + } + + if (instr->is_array) + instr->coord_components++; + } else { + instr->coord_components = 0; + } + + switch (glsl_get_sampler_result_type(image_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); + if (sampled.image) { + nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); + } else { + instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + } + + switch (instr->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + /* These operations require a sampler */ + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + break; + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_tg4: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + /* These don't */ + instr->sampler = NULL; + break; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, + nir_tex_instr_dest_size(instr), 32, NULL); + + assert(glsl_get_vector_elements(ret_type->type) == + nir_tex_instr_dest_size(instr)); + + val->ssa = vtn_create_ssa_value(b, ret_type->type); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageQuerySize: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = NULL; + image.sample = NULL; + break; + + case SpvOpImageRead: + image.image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1, 32); + } + break; + + case SpvOpImageWrite: + image.image = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1, 32); + } + break; + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageQuerySize, size) + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + + nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); + + /* ImageQuerySize doesn't take any extra parameters */ + if (opcode != SpvOpImageQuerySize) { + /* The image coordinate is always 4 components but we may not have that + * many. Swizzle to compensate. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < image.coord->num_components ? i : 0; + intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, + swiz, 4, false)); + intrin->src[1] = nir_src_for_ssa(image.sample); + } + + switch (opcode) { + case SpvOpImageQuerySize: + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL); + + nir_builder_instr_insert(&b->nb, &intrin->instr); + + /* The image intrinsics always return 4 channels but we may not want + * that many. Emit a mov to trim it down. + */ + unsigned swiz[4] = {0, 1, 2, 3}; + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, + glsl_get_vector_elements(type->type), false); + } else { + nir_builder_instr_insert(&b->nb, &intrin->instr); + } +} + +static nir_intrinsic_op +get_ssbo_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid SSBO atomic"); + } +} + +static nir_intrinsic_op +get_shared_nir_atomic_op(SpvOp opcode) +{ + switch (opcode) { +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid shared atomic"); + } +} + +static void +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, nir_src *src) +{ + switch (opcode) { + case SpvOpAtomicIIncrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + + case SpvOpAtomicIDecrement: + src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicISub: + src[0] = + nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + case SpvOpAtomicCompareExchange: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + break; + /* Fall through */ + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + default: + unreachable("Invalid SPIR-V atomic"); + } +} + +static void +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + nir_intrinsic_instr *atomic; + + /* + SpvScope scope = w[4]; + SpvMemorySemanticsMask semantics = w[5]; + */ + + if (chain->var->mode == vtn_variable_mode_workgroup) { + nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; + nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); + fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); + } else { + assert(chain->var->mode == vtn_variable_mode_ssbo); + struct vtn_type *type; + nir_ssa_def *offset, *index; + offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); + + nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); + + atomic = nir_intrinsic_instr_create(b->nb.shader, op); + atomic->src[0] = nir_src_for_ssa(index); + atomic->src[1] = nir_src_for_ssa(offset); + fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); + } + + nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->def = &atomic->dest.ssa; + val->ssa->type = type->type; + + nir_builder_instr_insert(&b->nb, &atomic->instr); +} + +static nir_alu_instr * +create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(shader, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, + bit_size, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +struct vtn_ssa_value * +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b->shader, + glsl_get_matrix_columns(src->type), + glsl_get_bit_size(glsl_get_base_type(src->type))); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + +nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components, + src->bit_size); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = + nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components, + srcs[0]->bit_size); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } else { + cur = cur->elems[indices[i]]; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + intrinsic_op = nir_intrinsic_barrier; + break; + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + nir_intrinsic_set_stream_id(intrin, w[1]); + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static gl_shader_stage +stage_for_execution_model(SpvExecutionModel model) +{ + switch (model) { + case SpvExecutionModelVertex: + return MESA_SHADER_VERTEX; + case SpvExecutionModelTessellationControl: + return MESA_SHADER_TESS_CTRL; + case SpvExecutionModelTessellationEvaluation: + return MESA_SHADER_TESS_EVAL; + case SpvExecutionModelGeometry: + return MESA_SHADER_GEOMETRY; + case SpvExecutionModelFragment: + return MESA_SHADER_FRAGMENT; + case SpvExecutionModelGLCompute: + return MESA_SHADER_COMPUTE; + default: + unreachable("Unsupported execution model"); + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpSourceContinued: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: { + SpvCapability cap = w[1]; + switch (cap) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + case SpvCapabilityGeometry: + case SpvCapabilityTessellationPointSize: + case SpvCapabilityGeometryPointSize: + case SpvCapabilityUniformBufferArrayDynamicIndexing: + case SpvCapabilitySampledImageArrayDynamicIndexing: + case SpvCapabilityStorageBufferArrayDynamicIndexing: + case SpvCapabilityStorageImageArrayDynamicIndexing: + case SpvCapabilityImageRect: + case SpvCapabilitySampledRect: + case SpvCapabilitySampled1D: + case SpvCapabilityImage1D: + case SpvCapabilitySampledCubeArray: + case SpvCapabilitySampledBuffer: + case SpvCapabilityImageBuffer: + case SpvCapabilityImageQuery: + break; + case SpvCapabilityClipDistance: + case SpvCapabilityCullDistance: + case SpvCapabilityGeometryStreams: + fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n"); + break; + default: + assert(!"Unsupported capability"); + } + break; + } + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: { + struct vtn_value *entry_point = &b->values[w[2]]; + /* Let this be a name label regardless */ + unsigned name_words; + entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); + + if (strcmp(entry_point->name, b->entry_point_name) != 0 || + stage_for_execution_model(w[1]) != b->entry_point_stage) + break; + + assert(b->entry_point == NULL); + b->entry_point = entry_point; + break; + } + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpExecutionMode: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static void +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, + const struct vtn_decoration *mode, void *data) +{ + assert(b->entry_point == entry_point); + + switch(mode->exec_mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = + (mode->exec_mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = mode->literals[0]; + b->shader->info.cs.local_size[1] = mode->literals[1]; + b->shader->info.cs.local_size[2] = mode->literals[2]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = mode->literals[0]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode->exec_mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode->exec_mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceContinued: + case SpvOpSourceExtension: + case SpvOpExtension: + case SpvOpCapability: + case SpvOpExtInstImport: + case SpvOpMemoryModel: + case SpvOpEntryPoint: + case SpvOpExecutionMode: + case SpvOpString: + case SpvOpName: + case SpvOpMemberName: + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + assert(!"Invalid opcode types and variables section"); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpConstantNull: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + case SpvOpSpecConstantOp: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: + break; + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + break; + } + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpImageQuerySize: { + struct vtn_access_chain *image = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + if (glsl_type_is_image(image->var->var->interface_type)) { + vtn_handle_image(b, opcode, w, count); + } else { + vtn_handle_texture(b, opcode, w, count); + } + break; + } + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(pointer->value_type == vtn_value_type_access_chain); + vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); + } + break; + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpQuantizeToF16: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpIAddCarry: + case SpvOpISubBorrow: + case SpvOpUMulExtended: + case SpvOpSMulExtended: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalOr: + case SpvOpLogicalAnd: + case SpvOpLogicalNot: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + case SpvOpBitFieldInsert: + case SpvOpBitFieldSExtract: + case SpvOpBitFieldUExtract: + case SpvOpBitReverse: + case SpvOpBitCount: + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +nir_function * +spirv_to_nir(const uint32_t *words, size_t word_count, + struct nir_spirv_specialization *spec, unsigned num_spec, + gl_shader_stage stage, const char *entry_point_name, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] >= 0x10000); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + b->entry_point_stage = stage; + b->entry_point_name = entry_point_name; + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + if (b->entry_point == NULL) { + assert(!"Entry point not found"); + ralloc_free(b); + return NULL; + } + + b->shader = nir_shader_create(NULL, stage, options); + + /* Set shader info defaults */ + b->shader->info.gs.invocations = 1; + + /* Parse execution modes */ + vtn_foreach_execution_mode(b, b->entry_point, + vtn_handle_execution_mode, NULL); + + b->specializations = spec; + b->num_specializations = num_spec; + + /* Handle all variable, type, and constant instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_variable_or_type_instruction); + + vtn_build_cfg(b, words, word_end); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = func->impl; + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_function_emit(b, func, vtn_handle_body_instruction); + } + + assert(b->entry_point->value_type == vtn_value_type_function); + nir_function *entry_point = b->entry_point->func->impl->function; + assert(entry_point); + + ralloc_free(b); + + return entry_point; +} diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c new file mode 100644 index 00000000000..8b9a63ce760 --- /dev/null +++ b/src/compiler/spirv/vtn_alu.c @@ -0,0 +1,464 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + const struct glsl_type *dest_type; + if (src1_columns > 1) { + dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns); + } else { + dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); + } + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); + + dest = wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_ssa_def *vec_src[4]; + for (unsigned j = 0; j < src0_rows; j++) { + vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def); + } + dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + nir_channel(&b->nb, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + nir_channel(&b->nb, src1->elems[i]->def, j))); + } + } + } + + dest = unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_ssa_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + struct vtn_value *dest, + struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) +{ + switch (opcode) { + case SpvOpFNegate: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); + break; + } + + case SpvOpFAdd: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpFSub: { + dest->ssa = vtn_create_ssa_value(b, src0->type); + unsigned cols = glsl_get_matrix_columns(src0->type); + for (unsigned i = 0; i < cols; i++) + dest->ssa->elems[i]->def = + nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); + break; + } + + case SpvOpTranspose: + dest->ssa = vtn_ssa_transpose(b, src0); + break; + + case SpvOpMatrixTimesScalar: + if (src0->transposed) { + dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, + src1->def)); + } else { + dest->ssa = mat_times_scalar(b, src0, src1->def); + } + break; + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + if (opcode == SpvOpVectorTimesMatrix) { + dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); + } else { + dest->ssa = matrix_multiply(b, src0, src1); + } + break; + + default: unreachable("unknown matrix opcode"); + } +} + +nir_op +vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) +{ + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + *swap = false; + + switch (opcode) { + case SpvOpSNegate: return nir_op_ineg; + case SpvOpFNegate: return nir_op_fneg; + case SpvOpNot: return nir_op_inot; + case SpvOpIAdd: return nir_op_iadd; + case SpvOpFAdd: return nir_op_fadd; + case SpvOpISub: return nir_op_isub; + case SpvOpFSub: return nir_op_fsub; + case SpvOpIMul: return nir_op_imul; + case SpvOpFMul: return nir_op_fmul; + case SpvOpUDiv: return nir_op_udiv; + case SpvOpSDiv: return nir_op_idiv; + case SpvOpFDiv: return nir_op_fdiv; + case SpvOpUMod: return nir_op_umod; + case SpvOpSMod: return nir_op_imod; + case SpvOpFMod: return nir_op_fmod; + case SpvOpSRem: return nir_op_irem; + case SpvOpFRem: return nir_op_frem; + + case SpvOpShiftRightLogical: return nir_op_ushr; + case SpvOpShiftRightArithmetic: return nir_op_ishr; + case SpvOpShiftLeftLogical: return nir_op_ishl; + case SpvOpLogicalOr: return nir_op_ior; + case SpvOpLogicalEqual: return nir_op_ieq; + case SpvOpLogicalNotEqual: return nir_op_ine; + case SpvOpLogicalAnd: return nir_op_iand; + case SpvOpLogicalNot: return nir_op_inot; + case SpvOpBitwiseOr: return nir_op_ior; + case SpvOpBitwiseXor: return nir_op_ixor; + case SpvOpBitwiseAnd: return nir_op_iand; + case SpvOpSelect: return nir_op_bcsel; + case SpvOpIEqual: return nir_op_ieq; + + case SpvOpBitFieldInsert: return nir_op_bitfield_insert; + case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; + case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; + case SpvOpBitReverse: return nir_op_bitfield_reverse; + case SpvOpBitCount: return nir_op_bit_count; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: return nir_op_feq; + case SpvOpFUnordEqual: return nir_op_feq; + case SpvOpINotEqual: return nir_op_ine; + case SpvOpFOrdNotEqual: return nir_op_fne; + case SpvOpFUnordNotEqual: return nir_op_fne; + case SpvOpULessThan: return nir_op_ult; + case SpvOpSLessThan: return nir_op_ilt; + case SpvOpFOrdLessThan: return nir_op_flt; + case SpvOpFUnordLessThan: return nir_op_flt; + case SpvOpUGreaterThan: *swap = true; return nir_op_ult; + case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; + case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; + case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; + case SpvOpULessThanEqual: *swap = true; return nir_op_uge; + case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; + case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpUGreaterThanEqual: return nir_op_uge; + case SpvOpSGreaterThanEqual: return nir_op_ige; + case SpvOpFOrdGreaterThanEqual: return nir_op_fge; + case SpvOpFUnordGreaterThanEqual: return nir_op_fge; + + /* Conversions: */ + case SpvOpConvertFToU: return nir_op_f2u; + case SpvOpConvertFToS: return nir_op_f2i; + case SpvOpConvertSToF: return nir_op_i2f; + case SpvOpConvertUToF: return nir_op_u2f; + case SpvOpBitcast: return nir_op_imov; + case SpvOpUConvert: + case SpvOpQuantizeToF16: return nir_op_fquantize2f16; + /* TODO: NIR is 32-bit only; these are no-ops. */ + case SpvOpSConvert: return nir_op_imov; + case SpvOpFConvert: return nir_op_fmov; + + /* Derivatives: */ + case SpvOpDPdx: return nir_op_fddx; + case SpvOpDPdy: return nir_op_fddy; + case SpvOpDPdxFine: return nir_op_fddx_fine; + case SpvOpDPdyFine: return nir_op_fddy_fine; + case SpvOpDPdxCoarse: return nir_op_fddx_coarse; + case SpvOpDPdyCoarse: return nir_op_fddy_coarse; + + default: + unreachable("No NIR equivalent"); + } +} + +static void +handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *_void) +{ + assert(dec->scope == VTN_DEC_DECORATION); + if (dec->decoration != SpvDecorationNoContraction) + return; + + b->nb.exact = true; +} + +void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + vtn_foreach_decoration(b, val, handle_no_contraction, NULL); + + /* Collect the various SSA sources */ + const unsigned num_inputs = count - 3; + struct vtn_ssa_value *vtn_src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + vtn_src[i] = vtn_ssa_value(b, w[i + 3]); + + if (glsl_type_is_matrix(vtn_src[0]->type) || + (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { + vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); + b->nb.exact = false; + return; + } + + val->ssa = vtn_create_ssa_value(b, type); + nir_ssa_def *src[4] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) { + assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); + src[i] = vtn_src[i]->def; + } + + switch (opcode) { + case SpvOpAny: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_FALSE), + NULL, NULL); + } + break; + + case SpvOpAll: + if (src[0]->num_components == 1) { + val->ssa->def = nir_imov(&b->nb, src[0]); + } else { + nir_op op; + switch (src[0]->num_components) { + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], + nir_imm_int(&b->nb, NIR_TRUE), + NULL, NULL); + } + break; + + case SpvOpOuterProduct: { + for (unsigned i = 0; i < src[1]->num_components; i++) { + val->ssa->elems[i]->def = + nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); + } + break; + } + + case SpvOpDot: + val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); + break; + + case SpvOpIAddCarry: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); + break; + + case SpvOpISubBorrow: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); + break; + + case SpvOpUMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); + break; + + case SpvOpSMulExtended: + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); + val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); + break; + + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); + break; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); + break; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); + break; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + break; + + case SpvOpIsNan: + val->ssa->def = nir_fne(&b->nb, src[0], src[0]); + break; + + case SpvOpIsInf: + val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), + nir_imm_float(&b->nb, INFINITY)); + break; + + default: { + bool swap; + nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); + break; + } /* default */ + } + + b->nb.exact = false; +} diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c new file mode 100644 index 00000000000..6a43ef8b2dd --- /dev/null +++ b/src/compiler/spirv/vtn_cfg.c @@ -0,0 +1,778 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vtn_private.h" +#include "nir/nir_vla.h" + +static bool +vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + list_inithead(&b->func->body); + b->func->control = w[3]; + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + val->func = b->func; + + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + func->num_params = glsl_get_length(func_type); + func->params = ralloc_array(b->shader, nir_parameter, func->num_params); + for (unsigned i = 0; i < func->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + func->params[i].type = param->type; + if (param->in) { + if (param->out) { + func->params[i].param_type = nir_parameter_inout; + } else { + func->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + func->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + + func->return_type = glsl_get_function_return_type(func_type); + + b->func->impl = nir_function_impl_create(func); + + b->func_param_idx = 0; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + + assert(b->func_param_idx < b->func->impl->num_params); + nir_variable *param = b->func->impl->params[b->func_param_idx++]; + + assert(param->type == type->type); + + /* Name the parameter so it shows up nicely in NIR */ + param->name = ralloc_strdup(param, val->name); + + struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); + vtn_var->type = type; + vtn_var->var = param; + vtn_var->chain.var = vtn_var; + vtn_var->chain.length = 0; + + struct vtn_type *without_array = type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + if (glsl_type_is_image(without_array->type)) { + vtn_var->mode = vtn_variable_mode_image; + param->interface_type = without_array->type; + } else if (glsl_type_is_sampler(without_array->type)) { + vtn_var->mode = vtn_variable_mode_sampler; + param->interface_type = without_array->type; + } else { + vtn_var->mode = vtn_variable_mode_param; + } + + val->access_chain = &vtn_var->chain; + break; + } + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->node.type = vtn_cf_node_type_block; + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge == NULL); + b->block->merge = w; + break; + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block && b->block->branch == NULL); + b->block->branch = w; + b->block = NULL; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static void +vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, + struct vtn_block *break_block, + uint32_t block_id, uint32_t val, bool is_default) +{ + struct vtn_block *case_block = + vtn_value(b, block_id, vtn_value_type_block)->block; + + /* Don't create dummy cases that just break */ + if (case_block == break_block) + return; + + if (case_block->switch_case == NULL) { + struct vtn_case *c = ralloc(b, struct vtn_case); + + list_inithead(&c->body); + c->start_block = case_block; + c->fallthrough = NULL; + nir_array_init(&c->values, b); + c->is_default = false; + c->visited = false; + + list_addtail(&c->link, &swtch->cases); + + case_block->switch_case = c; + } + + if (is_default) { + case_block->switch_case->is_default = true; + } else { + nir_array_add(&case_block->switch_case->values, uint32_t, val); + } +} + +/* This function performs a depth-first search of the cases and puts them + * in fall-through order. + */ +static void +vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) +{ + if (cse->visited) + return; + + cse->visited = true; + + list_del(&cse->link); + + if (cse->fallthrough) { + vtn_order_case(swtch, cse->fallthrough); + + /* If we have a fall-through, place this case right before the case it + * falls through to. This ensures that fallthroughs come one after + * the other. These two can never get separated because that would + * imply something else falling through to the same case. Also, this + * can't break ordering because the DFS ensures that this case is + * visited before anything that falls through to it. + */ + list_addtail(&cse->link, &cse->fallthrough->link); + } else { + list_add(&cse->link, &swtch->cases); + } +} + +static enum vtn_branch_type +vtn_get_branch_type(struct vtn_block *block, + struct vtn_case *swcase, struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont) +{ + if (block->switch_case) { + /* This branch is actually a fallthrough */ + assert(swcase->fallthrough == NULL || + swcase->fallthrough == block->switch_case); + swcase->fallthrough = block->switch_case; + return vtn_branch_type_switch_fallthrough; + } else if (block == switch_break) { + return vtn_branch_type_switch_break; + } else if (block == loop_break) { + return vtn_branch_type_loop_break; + } else if (block == loop_cont) { + return vtn_branch_type_loop_continue; + } else { + return vtn_branch_type_none; + } +} + +static void +vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, + struct vtn_block *start, struct vtn_case *switch_case, + struct vtn_block *switch_break, + struct vtn_block *loop_break, struct vtn_block *loop_cont, + struct vtn_block *end) +{ + struct vtn_block *block = start; + while (block != end) { + if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && + !block->loop) { + struct vtn_loop *loop = ralloc(b, struct vtn_loop); + + loop->node.type = vtn_cf_node_type_loop; + list_inithead(&loop->body); + list_inithead(&loop->cont_body); + loop->control = block->merge[3]; + + list_addtail(&loop->node.link, cf_list); + block->loop = loop; + + struct vtn_block *new_loop_break = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + struct vtn_block *new_loop_cont = + vtn_value(b, block->merge[2], vtn_value_type_block)->block; + + /* Note: This recursive call will start with the current block as + * its start block. If we weren't careful, we would get here + * again and end up in infinite recursion. This is why we set + * block->loop above and check for it before creating one. This + * way, we only create the loop once and the second call that + * tries to handle this loop goes to the cases below and gets + * handled as a regular block. + * + * Note: When we make the recursive walk calls, we pass NULL for + * the switch break since you have to break out of the loop first. + * We do, however, still pass the current switch case because it's + * possible that the merge block for the loop is the start of + * another case. + */ + vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, + new_loop_break, new_loop_cont, NULL ); + vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, + new_loop_break, NULL, block); + + block = new_loop_break; + continue; + } + + assert(block->node.link.next == NULL); + list_addtail(&block->node.link, cf_list); + + switch (*block->branch & SpvOpCodeMask) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, block->branch[1], vtn_value_type_block)->block; + + block->branch_type = vtn_get_branch_type(branch_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (block->branch_type != vtn_branch_type_none) + return; + + block = branch_block; + continue; + } + + case SpvOpReturn: + case SpvOpReturnValue: + block->branch_type = vtn_branch_type_return; + return; + + case SpvOpKill: + block->branch_type = vtn_branch_type_discard; + return; + + case SpvOpBranchConditional: { + struct vtn_block *then_block = + vtn_value(b, block->branch[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, block->branch[3], vtn_value_type_block)->block; + + struct vtn_if *if_stmt = ralloc(b, struct vtn_if); + + if_stmt->node.type = vtn_cf_node_type_if; + if_stmt->condition = block->branch[1]; + list_inithead(&if_stmt->then_body); + list_inithead(&if_stmt->else_body); + + list_addtail(&if_stmt->node.link, cf_list); + + if (block->merge && + (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { + if_stmt->control = block->merge[2]; + } + + if_stmt->then_type = vtn_get_branch_type(then_block, + switch_case, switch_break, + loop_break, loop_cont); + if_stmt->else_type = vtn_get_branch_type(else_block, + switch_case, switch_break, + loop_break, loop_cont); + + if (if_stmt->then_type == vtn_branch_type_none && + if_stmt->else_type == vtn_branch_type_none) { + /* Neither side of the if is something we can short-circuit. */ + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, + switch_case, switch_break, + loop_break, loop_cont, merge_block); + + enum vtn_branch_type merge_type = + vtn_get_branch_type(merge_block, switch_case, switch_break, + loop_break, loop_cont); + if (merge_type == vtn_branch_type_none) { + block = merge_block; + continue; + } else { + return; + } + } else if (if_stmt->then_type != vtn_branch_type_none && + if_stmt->else_type != vtn_branch_type_none) { + /* Both sides were short-circuited. We're done here. */ + return; + } else { + /* Exeactly one side of the branch could be short-circuited. + * We set the branch up as a predicated break/continue and we + * continue on with the other side as if it were what comes + * after the if. + */ + if (if_stmt->then_type == vtn_branch_type_none) { + block = then_block; + } else { + block = else_block; + } + continue; + } + unreachable("Should have returned or continued"); + } + + case SpvOpSwitch: { + assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); + struct vtn_block *break_block = + vtn_value(b, block->merge[1], vtn_value_type_block)->block; + + struct vtn_switch *swtch = ralloc(b, struct vtn_switch); + + swtch->node.type = vtn_cf_node_type_switch; + swtch->selector = block->branch[1]; + list_inithead(&swtch->cases); + + list_addtail(&swtch->node.link, cf_list); + + /* First, we go through and record all of the cases. */ + const uint32_t *branch_end = + block->branch + (block->branch[0] >> SpvWordCountShift); + + vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); + for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) + vtn_add_case(b, swtch, break_block, w[1], w[0], false); + + /* Now, we go through and walk the blocks. While we walk through + * the blocks, we also gather the much-needed fall-through + * information. + */ + list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { + assert(cse->start_block != break_block); + vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, + break_block, NULL, loop_cont, NULL); + } + + /* Finally, we walk over all of the cases one more time and put + * them in fall-through order. + */ + for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { + struct vtn_block *case_block = + vtn_value(b, *w, vtn_value_type_block)->block; + + if (case_block == break_block) + continue; + + assert(case_block->switch_case); + + vtn_order_case(swtch, case_block->switch_case); + } + + block = break_block; + continue; + } + + case SpvOpUnreachable: + return; + + default: + unreachable("Unhandled opcode"); + } + } +} + +void +vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) +{ + vtn_foreach_instruction(b, words, end, + vtn_cfg_handle_prepass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + vtn_cfg_walk_blocks(b, &func->body, func->start_block, + NULL, NULL, NULL, NULL, NULL); + } +} + +static bool +vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) + return true; /* Nothing to do */ + + /* If this isn't a phi node, stop. */ + if (opcode != SpvOpPhi) + return false; + + /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. + * For each phi, we create a variable with the appropreate type and + * do a load from that variable. Then, in a second pass, we add + * stores to that variable to each of the predecessor blocks. + * + * We could do something more intelligent here. However, in order to + * handle loops and things properly, we really need dominance + * information. It would end up basically being the into-SSA + * algorithm all over again. It's easier if we just let + * lower_vars_to_ssa do that for us instead of repeating it here. + */ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_variable *phi_var = + nir_local_variable_create(b->nb.impl, type->type, "phi"); + _mesa_hash_table_insert(b->phi_table, w, phi_var); + + val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); + + return true; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode != SpvOpPhi) + return true; + + struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); + assert(phi_entry); + nir_variable *phi_var = phi_entry->data; + + for (unsigned i = 3; i < count; i += 2) { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); + struct vtn_block *pred = + vtn_value(b, w[i + 1], vtn_value_type_block)->block; + + b->nb.cursor = nir_after_block_before_jump(pred->end_block); + + vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); + } + + return true; +} + +static void +vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, + nir_variable *switch_fall_var, bool *has_switch_break) +{ + switch (branch_type) { + case vtn_branch_type_switch_break: + nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + *has_switch_break = true; + break; + case vtn_branch_type_switch_fallthrough: + break; /* Nothing to do */ + case vtn_branch_type_loop_break: + nir_jump(&b->nb, nir_jump_break); + break; + case vtn_branch_type_loop_continue: + nir_jump(&b->nb, nir_jump_continue); + break; + case vtn_branch_type_return: + nir_jump(&b->nb, nir_jump_return); + break; + case vtn_branch_type_discard: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + break; + } + default: + unreachable("Invalid branch type"); + } +} + +static void +vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, + nir_variable *switch_fall_var, bool *has_switch_break, + vtn_instruction_handler handler) +{ + list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { + switch (node->type) { + case vtn_cf_node_type_block: { + struct vtn_block *block = (struct vtn_block *)node; + + const uint32_t *block_start = block->label; + const uint32_t *block_end = block->merge ? block->merge : + block->branch; + + block_start = vtn_foreach_instruction(b, block_start, block_end, + vtn_handle_phis_first_pass); + + vtn_foreach_instruction(b, block_start, block_end, handler); + + block->end_block = nir_cursor_current_block(b->nb.cursor); + + if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { + struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); + vtn_local_store(b, src, + nir_deref_var_create(b, b->impl->return_var)); + } + + if (block->branch_type != vtn_branch_type_none) { + vtn_emit_branch(b, block->branch_type, + switch_fall_var, has_switch_break); + } + + break; + } + + case vtn_cf_node_type_if: { + struct vtn_if *vtn_if = (struct vtn_if *)node; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = + nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + bool sw_break = false; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + if (vtn_if->then_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->then_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + if (vtn_if->else_type == vtn_branch_type_none) { + vtn_emit_cf_list(b, &vtn_if->else_body, + switch_fall_var, &sw_break, handler); + } else { + vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); + } + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + + /* If we encountered a switch break somewhere inside of the if, + * then it would have been handled correctly by calling + * emit_cf_list or emit_branch for the interrior. However, we + * need to predicate everything following on wether or not we're + * still going. + */ + if (sw_break) { + *has_switch_break = true; + + nir_if *switch_if = nir_if_create(b->shader); + switch_if->condition = + nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); + nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + } + break; + } + + case vtn_cf_node_type_loop: { + struct vtn_loop *vtn_loop = (struct vtn_loop *)node; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); + + if (!list_empty(&vtn_loop->cont_body)) { + /* If we have a non-trivial continue body then we need to put + * it at the beginning of the loop with a flag to ensure that + * it doesn't get executed in the first iteration. + */ + nir_variable *do_cont = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); + + b->nb.cursor = nir_before_cf_node(&loop->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); + + b->nb.cursor = nir_before_cf_list(&loop->body); + nir_if *cont_if = nir_if_create(b->shader); + cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); + nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); + + b->nb.cursor = nir_after_cf_list(&cont_if->then_list); + vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); + + b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); + nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); + + b->has_loop_continue = true; + } + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + break; + } + + case vtn_cf_node_type_switch: { + struct vtn_switch *vtn_switch = (struct vtn_switch *)node; + + /* First, we create a variable to keep track of whether or not the + * switch is still going at any given point. Any switch breaks + * will set this variable to false. + */ + nir_variable *fall_var = + nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); + + /* Next, we gather up all of the conditions. We have to do this + * up-front because we also need to build an "any" condition so + * that we can use !any for default. + */ + const int num_cases = list_length(&vtn_switch->cases); + NIR_VLA(nir_ssa_def *, conditions, num_cases); + + nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; + /* An accumulation of all conditions. Used for the default */ + nir_ssa_def *any = NULL; + + int i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + if (cse->is_default) { + conditions[i++] = NULL; + continue; + } + + nir_ssa_def *cond = NULL; + nir_array_foreach(&cse->values, uint32_t, val) { + nir_ssa_def *is_val = + nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); + + cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; + } + + any = any ? nir_ior(&b->nb, any, cond) : cond; + conditions[i++] = cond; + } + assert(i == num_cases); + + /* Now we can walk the list of cases and actually emit code */ + i = 0; + list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { + /* Figure out the condition */ + nir_ssa_def *cond = conditions[i++]; + if (cse->is_default) { + assert(cond == NULL); + cond = nir_inot(&b->nb, any); + } + /* Take fallthrough into account */ + cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); + + nir_if *case_if = nir_if_create(b->nb.shader); + case_if->condition = nir_src_for_ssa(cond); + nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); + + bool has_break = false; + b->nb.cursor = nir_after_cf_list(&case_if->then_list); + nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); + vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); + (void)has_break; /* We don't care */ + + b->nb.cursor = nir_after_cf_node(&case_if->cf_node); + } + assert(i == num_cases); + + break; + } + + default: + unreachable("Invalid CF node type"); + } + } +} + +void +vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler) +{ + nir_builder_init(&b->nb, func->impl); + b->nb.cursor = nir_after_cf_list(&func->impl->body); + b->has_loop_continue = false; + b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); + + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + + /* Continue blocks for loops get inserted before the body of the loop + * but instructions in the continue may use SSA defs in the loop body. + * Therefore, we need to repair SSA to insert the needed phi nodes. + */ + if (b->has_loop_continue) + nir_repair_ssa_impl(func->impl); +} diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c new file mode 100644 index 00000000000..e05d28ffede --- /dev/null +++ b/src/compiler/spirv/vtn_glsl450.c @@ -0,0 +1,666 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" +#include "GLSL.std.450.h" + +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + +static nir_ssa_def * +build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) +{ + unsigned swiz[4] = {1, 0, 0, 0}; + nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); + return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); +} + +static nir_ssa_def * +build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) +{ + unsigned yzx[4] = {1, 2, 0, 0}; + unsigned zxy[4] = {2, 0, 1, 0}; + + nir_ssa_def *prod0 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), + nir_swizzle(b, col[2], zxy, 3, true))); + nir_ssa_def *prod1 = + nir_fmul(b, col[0], + nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), + nir_swizzle(b, col[2], yzx, 3, true))); + + nir_ssa_def *diff = nir_fsub(b, prod0, prod1); + + return nir_fadd(b, nir_channel(b, diff, 0), + nir_fadd(b, nir_channel(b, diff, 1), + nir_channel(b, diff, 2))); +} + +static nir_ssa_def * +build_mat4_det(nir_builder *b, nir_ssa_def **col) +{ + nir_ssa_def *subdet[4]; + for (unsigned i = 0; i < 4; i++) { + unsigned swiz[3]; + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= i); + + nir_ssa_def *subcol[3]; + subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); + subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); + subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); + + subdet[i] = build_mat3_det(b, subcol); + } + + nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); + + return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), + nir_channel(b, prod, 1)), + nir_fsub(b, nir_channel(b, prod, 2), + nir_channel(b, prod, 3))); +} + +static nir_ssa_def * +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + unsigned size = glsl_get_vector_elements(src->type); + + nir_ssa_def *cols[4]; + for (unsigned i = 0; i < size; i++) + cols[i] = src->elems[i]->def; + + switch(size) { + case 2: return build_mat2_det(&b->nb, cols); + case 3: return build_mat3_det(&b->nb, cols); + case 4: return build_mat4_det(&b->nb, cols); + default: + unreachable("Invalid matrix size"); + } +} + +/* Computes the determinate of the submatrix given by taking src and + * removing the specified row and column. + */ +static nir_ssa_def * +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, + unsigned size, unsigned row, unsigned col) +{ + assert(row < size && col < size); + if (size == 2) { + return nir_channel(b, src->elems[1 - col]->def, 1 - row); + } else { + /* Swizzle to get all but the specified row */ + unsigned swiz[3]; + for (unsigned j = 0; j < 3; j++) + swiz[j] = j + (j >= row); + + /* Grab all but the specified column */ + nir_ssa_def *subcol[3]; + for (unsigned j = 0; j < size; j++) { + if (j != col) { + subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, + swiz, size - 1, true); + } + } + + if (size == 3) { + return build_mat2_det(b, subcol); + } else { + assert(size == 4); + return build_mat3_det(b, subcol); + } + } +} + +static struct vtn_ssa_value * +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + nir_ssa_def *adj_col[4]; + unsigned size = glsl_get_vector_elements(src->type); + + /* Build up an adjugate matrix */ + for (unsigned c = 0; c < size; c++) { + nir_ssa_def *elem[4]; + for (unsigned r = 0; r < size; r++) { + elem[r] = build_mat_subdet(&b->nb, src, size, c, r); + + if ((r + c) % 2) + elem[r] = nir_fneg(&b->nb, elem[r]); + } + + adj_col[c] = nir_vec(&b->nb, elem, size); + } + + nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); + + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); + for (unsigned i = 0; i < size; i++) + val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); + + return val; +} + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static inline nir_ssa_def * +build_fclamp(nir_builder *b, + nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) +{ + return nir_fmin(b, nir_fmax(b, x, min_val), max_val); +} + +/** + * Return e^x. + */ +static nir_ssa_def * +build_exp(nir_builder *b, nir_ssa_def *x) +{ + return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); +} + +/** + * Return ln(x) - the natural logarithm of x. + */ +static nir_ssa_def * +build_log(nir_builder *b, nir_ssa_def *x) +{ + return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); +} + +/** + * Approximate asin(x) by the formula: + * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1)))) + * + * which is correct to first order at x=0 and x=±1 regardless of the p + * coefficients but can be made second-order correct at both ends by selecting + * the fit coefficients appropriately. Different p coefficients can be used + * in the asin and acos implementation to minimize some relative error metric + * in each case. + */ +static nir_ssa_def * +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + return nir_fmul(b, nir_fsign(b, x), + nir_fsub(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), + nir_fadd(b, nir_imm_float(b, M_PI_2f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), + nir_fmul(b, abs_x, + nir_fadd(b, nir_imm_float(b, p0), + nir_fmul(b, abs_x, + nir_imm_float(b, p1)))))))))); +} + +/** + * Compute xs[0] + xs[1] + xs[2] + ... using fadd. + */ +static nir_ssa_def * +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) +{ + nir_ssa_def *accum = xs[0]; + + for (int i = 1; i < terms; i++) + accum = nir_fadd(b, accum, xs[i]); + + return accum; +} + +static nir_ssa_def * +build_atan(nir_builder *b, nir_ssa_def *y_over_x) +{ + nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); + nir_ssa_def *one = nir_imm_float(b, 1.0f); + + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), + nir_fmax(b, abs_y_over_x, one)); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + nir_ssa_def *x_2 = nir_fmul(b, x, x); + nir_ssa_def *x_3 = nir_fmul(b, x_2, x); + nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); + nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); + nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); + nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); + + nir_ssa_def *polynomial_terms[] = { + nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), + nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), + nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), + nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), + nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), + nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), + }; + + nir_ssa_def *tmp = + build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); + + /* range-reduction fixup */ + tmp = nir_fadd(b, tmp, + nir_fmul(b, + nir_b2f(b, nir_flt(b, one, abs_y_over_x)), + nir_fadd(b, nir_fmul(b, tmp, + nir_imm_float(b, -2.0f)), + nir_imm_float(b, M_PI_2f)))); + + /* sign fixup */ + return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); +} + +static nir_ssa_def * +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) +{ + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* If |x| >= 1.0e-8 * |y|: */ + nir_ssa_def *condition = + nir_fge(b, nir_fabs(b, x), + nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); + + /* Then...call atan(y/x) and fix it up: */ + nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); + nir_ssa_def *r_then = + nir_bcsel(b, nir_flt(b, x, zero), + nir_fadd(b, atan1, + nir_bcsel(b, nir_fge(b, y, zero), + nir_imm_float(b, M_PIf), + nir_imm_float(b, -M_PIf))), + atan1); + + /* Else... */ + nir_ssa_def *r_else = + nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); + + return nir_bcsel(b, condition, r_then, r_else); +} + +static nir_ssa_def * +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) +{ + nir_ssa_def *abs_x = nir_fabs(b, x); + nir_ssa_def *zero = nir_imm_float(b, 0.0f); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + nir_ssa_def *exponent_shift = nir_imm_int(b, 23); + nir_ssa_def *exponent_bias = nir_imm_int(b, -126); + + nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); + + nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); + + *exponent = + nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), + nir_bcsel(b, is_not_zero, exponent_bias, zero)); + + return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), + nir_bcsel(b, is_not_zero, exponent_value, zero)); +} + +static nir_op +vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode) +{ + switch (opcode) { + case GLSLstd450Round: return nir_op_fround_even; + case GLSLstd450RoundEven: return nir_op_fround_even; + case GLSLstd450Trunc: return nir_op_ftrunc; + case GLSLstd450FAbs: return nir_op_fabs; + case GLSLstd450SAbs: return nir_op_iabs; + case GLSLstd450FSign: return nir_op_fsign; + case GLSLstd450SSign: return nir_op_isign; + case GLSLstd450Floor: return nir_op_ffloor; + case GLSLstd450Ceil: return nir_op_fceil; + case GLSLstd450Fract: return nir_op_ffract; + case GLSLstd450Sin: return nir_op_fsin; + case GLSLstd450Cos: return nir_op_fcos; + case GLSLstd450Pow: return nir_op_fpow; + case GLSLstd450Exp2: return nir_op_fexp2; + case GLSLstd450Log2: return nir_op_flog2; + case GLSLstd450Sqrt: return nir_op_fsqrt; + case GLSLstd450InverseSqrt: return nir_op_frsq; + case GLSLstd450FMin: return nir_op_fmin; + case GLSLstd450UMin: return nir_op_umin; + case GLSLstd450SMin: return nir_op_imin; + case GLSLstd450FMax: return nir_op_fmax; + case GLSLstd450UMax: return nir_op_umax; + case GLSLstd450SMax: return nir_op_imax; + case GLSLstd450FMix: return nir_op_flrp; + case GLSLstd450Fma: return nir_op_ffma; + case GLSLstd450Ldexp: return nir_op_ldexp; + case GLSLstd450FindILsb: return nir_op_find_lsb; + case GLSLstd450FindSMsb: return nir_op_ifind_msb; + case GLSLstd450FindUMsb: return nir_op_ufind_msb; + + /* Packing/Unpacking functions */ + case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8; + case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8; + case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16; + case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16; + case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16; + case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8; + case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; + case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; + case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; + case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; + + default: + unreachable("No NIR equivalent"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, + const uint32_t *w, unsigned count) +{ + struct nir_builder *nb = &b->nb; + const struct glsl_type *dest_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, dest_type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3] = { NULL, }; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5])->def; + + switch (entrypoint) { + case GLSLstd450Radians: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); + return; + case GLSLstd450Degrees: + val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); + return; + case GLSLstd450Tan: + val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), + nir_fcos(nb, src[0])); + return; + + case GLSLstd450Modf: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), + nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); + return; + } + + case GLSLstd450ModfStruct: { + nir_ssa_def *sign = nir_fsign(nb, src[0]); + nir_ssa_def *abs = nir_fabs(nb, src[0]); + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); + val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); + return; + } + + case GLSLstd450Step: + val->ssa->def = nir_sge(nb, src[1], src[0]); + return; + + case GLSLstd450Length: + val->ssa->def = build_length(nb, src[0]); + return; + case GLSLstd450Distance: + val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); + return; + case GLSLstd450Normalize: + val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); + return; + + case GLSLstd450Exp: + val->ssa->def = build_exp(nb, src[0]); + return; + + case GLSLstd450Log: + val->ssa->def = build_log(nb, src[0]); + return; + + case GLSLstd450FClamp: + val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); + return; + case GLSLstd450UClamp: + val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); + return; + case GLSLstd450SClamp: + val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); + return; + + case GLSLstd450Cross: { + unsigned yzx[4] = { 1, 2, 0, 0 }; + unsigned zxy[4] = { 2, 0, 1, 0 }; + val->ssa->def = + nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), + nir_swizzle(nb, src[1], zxy, 3, true)), + nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), + nir_swizzle(nb, src[1], yzx, 3, true))); + return; + } + + case GLSLstd450SmoothStep: { + /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ + nir_ssa_def *t = + build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), + nir_fsub(nb, src[1], src[0])), + nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); + /* result = t * t * (3 - 2 * t) */ + val->ssa->def = + nir_fmul(nb, t, nir_fmul(nb, t, + nir_fsub(nb, nir_imm_float(nb, 3.0), + nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); + return; + } + + case GLSLstd450FaceForward: + val->ssa->def = + nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), + nir_imm_float(nb, 0.0)), + src[0], nir_fneg(nb, src[0])); + return; + + case GLSLstd450Reflect: + /* I - 2 * dot(N, I) * N */ + val->ssa->def = + nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), + nir_fmul(nb, nir_fdot(nb, src[0], src[1]), + src[1]))); + return; + + case GLSLstd450Refract: { + nir_ssa_def *I = src[0]; + nir_ssa_def *N = src[1]; + nir_ssa_def *eta = src[2]; + nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); + nir_ssa_def *one = nir_imm_float(nb, 1.0); + nir_ssa_def *zero = nir_imm_float(nb, 0.0); + /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ + nir_ssa_def *k = + nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, + nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); + nir_ssa_def *result = + nir_fsub(nb, nir_fmul(nb, eta, I), + nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), + nir_fsqrt(nb, k)), N)); + /* XXX: bcsel, or if statement? */ + val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); + return; + } + + case GLSLstd450Sinh: + /* 0.5 * (e^x - e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Cosh: + /* 0.5 * (e^x + e^(-x)) */ + val->ssa->def = + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))); + return; + + case GLSLstd450Tanh: + /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ + val->ssa->def = + nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fsub(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0])))), + nir_fmul(nb, nir_imm_float(nb, 0.5f), + nir_fadd(nb, build_exp(nb, src[0]), + build_exp(nb, nir_fneg(nb, src[0]))))); + return; + + case GLSLstd450Asinh: + val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), + build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), + nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f)))))); + return; + case GLSLstd450Acosh: + val->ssa->def = build_log(nb, nir_fadd(nb, src[0], + nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), + nir_imm_float(nb, 1.0f))))); + return; + case GLSLstd450Atanh: { + nir_ssa_def *one = nir_imm_float(nb, 1.0); + val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), + build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), + nir_fsub(nb, one, src[0])))); + return; + } + + case GLSLstd450Asin: + val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955); + return; + + case GLSLstd450Acos: + val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f), + build_asin(nb, src[0], 0.08132463, -0.02363318)); + return; + + case GLSLstd450Atan: + val->ssa->def = build_atan(nb, src[0]); + return; + + case GLSLstd450Atan2: + val->ssa->def = build_atan2(nb, src[0], src[1]); + return; + + case GLSLstd450Frexp: { + nir_ssa_def *exponent; + val->ssa->def = build_frexp(nb, src[0], &exponent); + nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); + return; + } + + case GLSLstd450FrexpStruct: { + assert(glsl_type_is_struct(val->ssa->type)); + val->ssa->elems[0]->def = build_frexp(nb, src[0], + &val->ssa->elems[1]->def); + return; + } + + default: + val->ssa->def = + nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint), + src[0], src[1], src[2], NULL); + return; + } +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *w, unsigned count) +{ + switch ((enum GLSLstd450)ext_opcode) { + case GLSLstd450Determinant: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450MatrixInverse: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); + break; + } + + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); + } + + return true; +} diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h new file mode 100644 index 00000000000..3840d8c4b65 --- /dev/null +++ b/src/compiler/spirv/vtn_private.h @@ -0,0 +1,484 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "nir/nir_array.h" +#include "nir_spirv.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_access_chain, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, + vtn_value_type_extension, + vtn_value_type_image_pointer, + vtn_value_type_sampled_image, +}; + +enum vtn_branch_type { + vtn_branch_type_none, + vtn_branch_type_switch_break, + vtn_branch_type_switch_fallthrough, + vtn_branch_type_loop_break, + vtn_branch_type_loop_continue, + vtn_branch_type_discard, + vtn_branch_type_return, +}; + +enum vtn_cf_node_type { + vtn_cf_node_type_block, + vtn_cf_node_type_if, + vtn_cf_node_type_loop, + vtn_cf_node_type_switch, +}; + +struct vtn_cf_node { + struct list_head link; + enum vtn_cf_node_type type; +}; + +struct vtn_loop { + struct vtn_cf_node node; + + /* The main body of the loop */ + struct list_head body; + + /* The "continue" part of the loop. This gets executed after the body + * and is where you go when you hit a continue. + */ + struct list_head cont_body; + + SpvLoopControlMask control; +}; + +struct vtn_if { + struct vtn_cf_node node; + + uint32_t condition; + + enum vtn_branch_type then_type; + struct list_head then_body; + + enum vtn_branch_type else_type; + struct list_head else_body; + + SpvSelectionControlMask control; +}; + +struct vtn_case { + struct list_head link; + + struct list_head body; + + /* The block that starts this case */ + struct vtn_block *start_block; + + /* The fallthrough case, if any */ + struct vtn_case *fallthrough; + + /* The uint32_t values that map to this case */ + nir_array values; + + /* True if this is the default case */ + bool is_default; + + /* Initialized to false; used when sorting the list of cases */ + bool visited; +}; + +struct vtn_switch { + struct vtn_cf_node node; + + uint32_t selector; + + struct list_head cases; +}; + +struct vtn_block { + struct vtn_cf_node node; + + /** A pointer to the label instruction */ + const uint32_t *label; + + /** A pointer to the merge instruction (or NULL if non exists) */ + const uint32_t *merge; + + /** A pointer to the branch instruction that ends this block */ + const uint32_t *branch; + + enum vtn_branch_type branch_type; + + /** Points to the loop that this block starts (if it starts a loop) */ + struct vtn_loop *loop; + + /** Points to the switch case started by this block (if any) */ + struct vtn_case *switch_case; + + /** The last block in this SPIR-V block. */ + nir_block *end_block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_impl *impl; + struct vtn_block *start_block; + + struct list_head body; + + const uint32_t *end; + + SpvFunctionControlMask control; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, + const uint32_t *end); +void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, + vtn_instruction_handler instruction_handler); + +const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler); + +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, if this is non-NULL, then this value is actually the + * transpose of some other value. The value that `transposed` points to + * always dominates this value. + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* The value that declares this type. Used for finding decorations */ + struct vtn_value *val; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* Image format for image_load_store type images */ + unsigned image_format; + + /* Access qualifier for storage images */ + SpvAccessQualifier access_qualifier; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + +struct vtn_variable; + +enum vtn_access_mode { + vtn_access_mode_id, + vtn_access_mode_literal, +}; + +struct vtn_access_link { + enum vtn_access_mode mode; + uint32_t id; +}; + +struct vtn_access_chain { + struct vtn_variable *var; + + uint32_t length; + + /* Struct elements and array offsets */ + struct vtn_access_link link[0]; +}; + +enum vtn_variable_mode { + vtn_variable_mode_local, + vtn_variable_mode_global, + vtn_variable_mode_param, + vtn_variable_mode_ubo, + vtn_variable_mode_ssbo, + vtn_variable_mode_push_constant, + vtn_variable_mode_image, + vtn_variable_mode_sampler, + vtn_variable_mode_workgroup, + vtn_variable_mode_input, + vtn_variable_mode_output, +}; + +struct vtn_variable { + enum vtn_variable_mode mode; + + struct vtn_type *type; + + unsigned descriptor_set; + unsigned binding; + + nir_variable *var; + nir_variable **members; + + struct vtn_access_chain chain; +}; + +struct vtn_image_pointer { + struct vtn_access_chain *image; + nir_ssa_def *coord; + nir_ssa_def *sample; +}; + +struct vtn_sampled_image { + struct vtn_access_chain *image; /* Image or array of images */ + struct vtn_access_chain *sampler; /* Sampler */ +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct vtn_access_chain *access_chain; + struct vtn_image_pointer *image; + struct vtn_sampled_image *sampled_image; + struct vtn_function *func; + struct vtn_block *block; + struct vtn_ssa_value *ssa; + vtn_instruction_handler ext_handler; + }; +}; + +#define VTN_DEC_DECORATION -1 +#define VTN_DEC_EXECUTION_MODE -2 +#define VTN_DEC_STRUCT_MEMBER0 0 + +struct vtn_decoration { + struct vtn_decoration *next; + + /* Specifies how to apply this decoration. Negative values represent a + * decoration or execution mode. (See the VTN_DEC_ #defines above.) + * Non-negative values specify that it applies to a structure member. + */ + int scope; + + const uint32_t *literals; + struct vtn_value *group; + + union { + SpvDecoration decoration; + SpvExecutionMode exec_mode; + }; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + + /* Current file, line, and column. Useful for debugging. Set + * automatically by vtn_foreach_instruction. + */ + char *file; + int line, col; + + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from phi instructions (pointer to the start of the instruction) + * to the variable corresponding to it. + */ + struct hash_table *phi_table; + + unsigned num_specializations; + struct nir_spirv_specialization *specializations; + + unsigned value_id_bound; + struct vtn_value *values; + + gl_shader_stage entry_point_stage; + const char *entry_point_name; + struct vtn_value *entry_point; + bool origin_upper_left; + + struct vtn_function *func; + struct exec_list functions; + + /* Current function parameter index */ + unsigned func_param_idx; + + bool has_loop_continue; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, + const struct glsl_type *type); + +struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, + struct vtn_ssa_value *src); + +nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, + unsigned index); +nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index); +nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, unsigned index); +nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index); + +nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); + +nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, + struct vtn_access_chain *chain); +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix); + +struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); + +void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest); + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); + +void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest); + +void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + int member, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); + +typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + const struct vtn_decoration *, + void *); + +void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, + vtn_execution_mode_foreach_cb cb, void *data); + +nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); + +void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c new file mode 100644 index 00000000000..3cbac1e5da8 --- /dev/null +++ b/src/compiler/spirv/vtn_variables.c @@ -0,0 +1,1415 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "vtn_private.h" + +static struct vtn_access_chain * +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, + unsigned new_ids) +{ + struct vtn_access_chain *chain; + + unsigned new_len = old->length + new_ids; + chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); + + chain->var = old->var; + chain->length = new_len; + + for (unsigned i = 0; i < old->length; i++) + chain->link[i] = old->link[i]; + + return chain; +} + +static nir_ssa_def * +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, + unsigned stride) +{ + assert(stride > 0); + if (link.mode == vtn_access_mode_literal) { + return nir_imm_int(&b->nb, link.id * stride); + } else if (stride == 1) { + return vtn_ssa_value(b, link.id)->def; + } else { + return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, + nir_imm_int(&b->nb, stride)); + } +} + +static struct vtn_type * +vtn_access_chain_tail_type(struct vtn_builder *b, + struct vtn_access_chain *chain) +{ + struct vtn_type *type = chain->var->type; + for (unsigned i = 0; i < chain->length; i++) { + if (glsl_type_is_struct(type->type)) { + assert(chain->link[i].mode == vtn_access_mode_literal); + type = type->members[chain->link[i].id]; + } else { + type = type->array_element; + } + } + return type; +} + +/* Crawls a chain of array derefs and rewrites the types so that the + * lengths stay the same but the terminal type is the one given by + * tail_type. This is useful for split structures. + */ +static void +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) +{ + deref->type = type; + if (deref->child) { + assert(deref->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_array(deref->type)); + rewrite_deref_types(deref->child, glsl_get_array_element(type)); + } +} + +nir_deref_var * +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) +{ + nir_deref_var *deref_var; + if (chain->var->var) { + deref_var = nir_deref_var_create(b, chain->var->var); + } else { + assert(chain->var->members); + /* Create the deref_var manually. It will get filled out later. */ + deref_var = rzalloc(b, nir_deref_var); + deref_var->deref.deref_type = nir_deref_type_var; + } + + struct vtn_type *deref_type = chain->var->type; + nir_deref *tail = &deref_var->deref; + nir_variable **members = chain->var->members; + + for (unsigned i = 0; i < chain->length; i++) { + enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + deref_type = deref_type->array_element; + + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref.type = deref_type->type; + + if (chain->link[i].mode == vtn_access_mode_literal) { + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = chain->link[i].id; + } else { + assert(chain->link[i].mode == vtn_access_mode_id); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); + } + tail->child = &deref_arr->deref; + tail = tail->child; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(chain->link[i].mode == vtn_access_mode_literal); + unsigned idx = chain->link[i].id; + deref_type = deref_type->members[idx]; + if (members) { + /* This is a pre-split structure. */ + deref_var->var = members[idx]; + rewrite_deref_types(&deref_var->deref, members[idx]->type); + assert(tail->type == deref_type->type); + members = NULL; + } else { + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + tail = tail->child; + } + break; + } + default: + unreachable("Invalid type for deref"); + } + } + + assert(members == NULL); + return deref_var; +} + +static void +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, + nir_deref *tail, struct vtn_ssa_value *inout) +{ + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = tail->child; + + if (glsl_type_is_vector_or_scalar(tail->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + tail->child = NULL; + + nir_intrinsic_op op = load ? nir_intrinsic_load_var : + nir_intrinsic_store_var; + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); + intrin->num_components = glsl_get_vector_elements(tail->type); + + if (load) { + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + intrin->num_components, + glsl_get_bit_size(glsl_get_base_type(tail->type)), + NULL); + inout->def = &intrin->dest.ssa; + } else { + nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1); + intrin->src[0] = nir_src_for_ssa(inout->def); + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); + } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + unsigned elems = glsl_get_length(tail->type); + nir_deref_array *deref_arr = nir_deref_array_create(b); + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->deref.type = glsl_get_array_element(tail->type); + tail->child = &deref_arr->deref; + for (unsigned i = 0; i < elems; i++) { + deref_arr->base_offset = i; + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } else { + assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(tail->type); + nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); + tail->child = &deref_struct->deref; + for (unsigned i = 0; i < elems; i++) { + deref_struct->index = i; + deref_struct->deref.type = glsl_get_struct_field(tail->type, i); + _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); + } + } + + tail->child = old_child; +} + +nir_deref_var * +vtn_nir_deref(struct vtn_builder *b, uint32_t id) +{ + struct vtn_access_chain *chain = + vtn_value(b, id, vtn_value_type_access_chain)->access_chain; + + return vtn_access_chain_to_deref(b, chain); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +struct vtn_ssa_value * +vtn_local_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); + _vtn_local_load_store(b, true, src, src_tail, val); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +void +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + + if (dest_tail->child) { + struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); + _vtn_local_load_store(b, true, dest, dest_tail, val); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_local_load_store(b, false, dest, dest_tail, val); + } else { + _vtn_local_load_store(b, false, dest, dest_tail, src); + } +} + +static nir_ssa_def * +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, + struct vtn_type **type, unsigned *chain_idx) +{ + /* Push constants have no explicit binding */ + if (chain->var->mode == vtn_variable_mode_push_constant) { + *chain_idx = 0; + *type = chain->var->type; + return NULL; + } + + nir_ssa_def *array_index; + if (glsl_type_is_array(chain->var->type->type)) { + assert(chain->length > 0); + array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); + *chain_idx = 1; + *type = chain->var->type->array_element; + } else { + array_index = nir_imm_int(&b->nb, 0); + *chain_idx = 0; + *type = chain->var->type; + } + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set); + nir_intrinsic_set_binding(instr, chain->var->binding); + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + + return &instr->dest.ssa; +} + +nir_ssa_def * +vtn_access_chain_to_offset(struct vtn_builder *b, + struct vtn_access_chain *chain, + nir_ssa_def **index_out, struct vtn_type **type_out, + unsigned *end_idx_out, bool stop_at_matrix) +{ + unsigned idx = 0; + struct vtn_type *type; + *index_out = get_vulkan_resource_index(b, chain, &type, &idx); + + nir_ssa_def *offset = nir_imm_int(&b->nb, 0); + for (; idx < chain->length; idx++) { + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + /* Some users may not want matrix or vector derefs */ + if (stop_at_matrix) + goto end; + /* Fall through */ + + case GLSL_TYPE_ARRAY: + offset = nir_iadd(&b->nb, offset, + vtn_access_link_as_ssa(b, chain->link[idx], + type->stride)); + + type = type->array_element; + break; + + case GLSL_TYPE_STRUCT: { + assert(chain->link[idx].mode == vtn_access_mode_literal); + unsigned member = chain->link[idx].id; + offset = nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, type->offsets[member])); + type = type->members[member]; + break; + } + + default: + unreachable("Invalid type for deref"); + } + } + +end: + *type_out = type; + if (end_idx_out) + *end_idx_out = idx; + + return offset; +} + +static void +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_ssa_value **inout, const struct glsl_type *type) +{ + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); + instr->num_components = glsl_get_vector_elements(type); + + int src = 0; + if (!load) { + nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1); + instr->src[src++] = nir_src_for_ssa((*inout)->def); + } + + /* We set the base and size for push constant load to the entire push + * constant block for now. + */ + if (op == nir_intrinsic_load_push_constant) { + nir_intrinsic_set_base(instr, 0); + nir_intrinsic_set_range(instr, 128); + } + + if (index) + instr->src[src++] = nir_src_for_ssa(index); + + instr->src[src++] = nir_src_for_ssa(offset); + + if (load) { + nir_ssa_dest_init(&instr->instr, &instr->dest, + instr->num_components, + glsl_get_bit_size(glsl_get_base_type(type)), NULL); + (*inout)->def = &instr->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &instr->instr); + + if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) + (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); +} + +static void +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, + nir_ssa_def *index, nir_ssa_def *offset, + struct vtn_access_chain *chain, unsigned chain_idx, + struct vtn_type *type, struct vtn_ssa_value **inout) +{ + if (chain && chain_idx >= chain->length) + chain = NULL; + + if (load && chain == NULL && *inout == NULL) + *inout = vtn_create_ssa_value(b, type->type); + + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* This is where things get interesting. At this point, we've hit + * a vector, a scalar, or a matrix. + */ + if (glsl_type_is_matrix(type->type)) { + if (chain == NULL) { + /* Loading the whole matrix */ + struct vtn_ssa_value *transpose; + unsigned num_ops, vec_width; + if (type->row_major) { + num_ops = glsl_get_vector_elements(type->type); + vec_width = glsl_get_matrix_columns(type->type); + if (load) { + const struct glsl_type *transpose_type = + glsl_matrix_type(base_type, vec_width, num_ops); + *inout = vtn_create_ssa_value(b, transpose_type); + } else { + transpose = vtn_ssa_transpose(b, *inout); + inout = &transpose; + } + } else { + num_ops = glsl_get_matrix_columns(type->type); + vec_width = glsl_get_vector_elements(type->type); + } + + for (unsigned i = 0; i < num_ops; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + _vtn_load_store_tail(b, op, load, index, elem_offset, + &(*inout)->elems[i], + glsl_vector_type(base_type, vec_width)); + } + + if (load && type->row_major) + *inout = vtn_ssa_transpose(b, *inout); + } else if (type->row_major) { + /* Row-major but with an access chiain. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], + type->array_element->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + if (chain_idx + 1 < chain->length) { + /* Picking off a single element */ + nir_ssa_def *row_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], + type->stride); + offset = nir_iadd(&b->nb, offset, row_offset); + if (load) + *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, + glsl_scalar_type(base_type)); + } else { + /* Grabbing a column; picking one element off each row */ + unsigned num_comps = glsl_get_vector_elements(type->type); + const struct glsl_type *column_type = + glsl_get_column_type(type->type); + + nir_ssa_def *comps[4]; + for (unsigned i = 0; i < num_comps; i++) { + nir_ssa_def *elem_offset = + nir_iadd(&b->nb, offset, + nir_imm_int(&b->nb, i * type->stride)); + + struct vtn_ssa_value *comp, temp_val; + if (!load) { + temp_val.def = nir_channel(&b->nb, (*inout)->def, i); + temp_val.type = glsl_scalar_type(base_type); + } + comp = &temp_val; + _vtn_load_store_tail(b, op, load, index, elem_offset, + &comp, glsl_scalar_type(base_type)); + comps[i] = comp->def; + } + + if (load) { + if (*inout == NULL) + *inout = vtn_create_ssa_value(b, column_type); + + (*inout)->def = nir_vec(&b->nb, comps, num_comps); + } + } + } else { + /* Column-major with a deref. Fall through to array case. */ + nir_ssa_def *col_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, col_offset); + + _vtn_block_load_store(b, op, load, index, offset, + chain, chain_idx + 1, + type->array_element, inout); + } + } else if (chain == NULL) { + /* Single whole vector */ + assert(glsl_type_is_vector_or_scalar(type->type)); + _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); + } else { + /* Single component of a vector. Fall through to array case. */ + nir_ssa_def *elem_offset = + vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); + offset = nir_iadd(&b->nb, offset, elem_offset); + + _vtn_block_load_store(b, op, load, index, offset, NULL, 0, + type->array_element, inout); + } + return; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->array_element, &(*inout)->elems[i]); + } + return; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(type->type); + for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *elem_off = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, + type->members[i], &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid block member type"); + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + nir_intrinsic_op op; + switch (src->var->mode) { + case vtn_variable_mode_ubo: + op = nir_intrinsic_load_ubo; + break; + case vtn_variable_mode_ssbo: + op = nir_intrinsic_load_ssbo; + break; + case vtn_variable_mode_push_constant: + op = nir_intrinsic_load_push_constant; + break; + default: + assert(!"Invalid block variable mode"); + } + + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); + + struct vtn_ssa_value *value = NULL; + _vtn_block_load_store(b, op, true, index, offset, + src, chain_idx, type, &value); + return value; +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dst) +{ + nir_ssa_def *offset, *index = NULL; + struct vtn_type *type; + unsigned chain_idx; + offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); + + _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, + dst, chain_idx, type, &src); +} + +static bool +vtn_variable_is_external_block(struct vtn_variable *var) +{ + return var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_push_constant; +} + +static void +_vtn_variable_load_store(struct vtn_builder *b, bool load, + struct vtn_access_chain *chain, + struct vtn_type *tail_type, + struct vtn_ssa_value **inout) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + if (load) { + *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); + } else { + vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); + } + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_chain = + vtn_access_chain_extend(b, chain, 1); + new_chain->link[chain->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + if (load) { + assert(*inout == NULL); + *inout = rzalloc(b, struct vtn_ssa_value); + (*inout)->type = tail_type->type; + (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); + } + for (unsigned i = 0; i < elems; i++) { + new_chain->link[chain->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_load_store(b, load, new_chain, elem_type, + &(*inout)->elems[i]); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) +{ + if (vtn_variable_is_external_block(src->var)) { + return vtn_block_load(b, src); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + struct vtn_ssa_value *val = NULL; + _vtn_variable_load_store(b, true, src, tail_type, &val); + return val; + } +} + +void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_access_chain *dest) +{ + if (vtn_variable_is_external_block(dest->var)) { + assert(dest->var->mode == vtn_variable_mode_ssbo); + vtn_block_store(b, src, dest); + } else { + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); + _vtn_variable_load_store(b, false, dest, tail_type, &src); + } +} + +static void +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src, struct vtn_type *tail_type) +{ + enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + /* At this point, we have a scalar, vector, or matrix so we know that + * there cannot be any structure splitting still in the way. By + * stopping at the matrix level rather than the vector level, we + * ensure that matrices get loaded in the optimal way even if they + * are storred row-major in a UBO. + */ + vtn_variable_store(b, vtn_variable_load(b, src), dest); + return; + + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: { + struct vtn_access_chain *new_src, *new_dest; + new_src = vtn_access_chain_extend(b, src, 1); + new_dest = vtn_access_chain_extend(b, dest, 1); + new_src->link[src->length].mode = vtn_access_mode_literal; + new_dest->link[dest->length].mode = vtn_access_mode_literal; + unsigned elems = glsl_get_length(tail_type->type); + for (unsigned i = 0; i < elems; i++) { + new_src->link[src->length].id = i; + new_dest->link[dest->length].id = i; + struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? + tail_type->array_element : tail_type->members[i]; + _vtn_variable_copy(b, new_dest, new_src, elem_type); + } + return; + } + + default: + unreachable("Invalid access chain type"); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, + struct vtn_access_chain *src) +{ + struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); + assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); + + /* TODO: At some point, we should add a special-case for when we can + * just emit a copy_var intrinsic. + */ + _vtn_variable_copy(b, dest, src, tail_type); +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + break; + case SpvBuiltInVertexIndex: + *location = SYSTEM_VALUE_VERTEX_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceIndex: + *location = SYSTEM_VALUE_INSTANCE_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInViewportIndex: + *location = VARYING_SLOT_VIEWPORT; + if (b->shader->stage == MESA_SHADER_GEOMETRY) + *mode = nir_var_shader_out; + else if (b->shader->stage == MESA_SHADER_FRAGMENT) + *mode = nir_var_shader_in; + else + unreachable("invalid stage for SpvBuiltInViewportIndex"); + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + *location = SYSTEM_VALUE_NUM_WORK_GROUPS; + set_mode_system_value(mode); + break; + case SpvBuiltInWorkgroupSize: + /* This should already be handled */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationIndex: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; + set_mode_system_value(mode); + break; + case SpvBuiltInGlobalInvocationId: + *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + struct vtn_variable *vtn_var = void_var; + + /* Handle decorations that apply to a vtn_variable as a whole */ + switch (dec->decoration) { + case SpvDecorationBinding: + vtn_var->binding = dec->literals[0]; + return; + case SpvDecorationDescriptorSet: + vtn_var->descriptor_set = dec->literals[0]; + return; + + case SpvDecorationLocation: { + unsigned location = dec->literals[0]; + bool is_vertex_input; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + vtn_var->mode == vtn_variable_mode_input) { + is_vertex_input = true; + location += VERT_ATTRIB_GENERIC0; + } else if (vtn_var->mode == vtn_variable_mode_input || + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + + if (vtn_var->var) { + vtn_var->var->data.location = location; + vtn_var->var->data.explicit_location = true; + } else { + assert(vtn_var->members); + unsigned length = glsl_get_length(vtn_var->type->type); + for (unsigned i = 0; i < length; i++) { + vtn_var->members[i]->data.location = location; + vtn_var->members[i]->data.explicit_location = true; + location += + glsl_count_attribute_slots(vtn_var->members[i]->interface_type, + is_vertex_input); + } + } + return; + } + + default: + break; + } + + /* Now we handle decorations that apply to a particular nir_variable */ + nir_variable *nir_var = vtn_var->var; + if (val->value_type == vtn_value_type_access_chain) { + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + assert(member == -1); + } else { + assert(val->value_type == vtn_value_type_type); + if (member != -1) + nir_var = vtn_var->members[member]; + } + + if (nir_var == NULL) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + nir_var->data.centroid = true; + break; + case SpvDecorationSample: + nir_var->data.sample = true; + break; + case SpvDecorationInvariant: + nir_var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(nir_var->constant_initializer != NULL); + nir_var->data.read_only = true; + break; + case SpvDecorationNonWritable: + nir_var->data.read_only = true; + break; + case SpvDecorationComponent: + nir_var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + nir_var->data.explicit_index = true; + nir_var->data.index = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + if (builtin == SpvBuiltInWorkgroupSize) { + /* This shouldn't be a builtin. It's actually a constant. */ + nir_var->data.mode = nir_var_global; + nir_var->data.read_only = true; + + nir_constant *c = rzalloc(nir_var, nir_constant); + c->value.u[0] = b->shader->info.cs.local_size[0]; + c->value.u[1] = b->shader->info.cs.local_size[1]; + c->value.u[2] = b->shader->info.cs.local_size[2]; + nir_var->constant_initializer = c; + break; + } + + nir_variable_mode mode = nir_var->data.mode; + vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); + nir_var->data.explicit_location = true; + nir_var->data.mode = mode; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + nir_var->data.origin_upper_left = b->origin_upper_left; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_variable *var = rzalloc(b, struct vtn_variable); + var->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + var->chain.var = var; + var->chain.length = 0; + + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = &var->chain; + + struct vtn_type *without_array = var->type; + while(glsl_type_is_array(without_array->type)) + without_array = without_array->array_element; + + nir_variable_mode nir_mode; + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (without_array->block) { + var->mode = vtn_variable_mode_ubo; + b->shader->info.num_ubos++; + } else if (without_array->buffer_block) { + var->mode = vtn_variable_mode_ssbo; + b->shader->info.num_ssbos++; + } else if (glsl_type_is_image(without_array->type)) { + var->mode = vtn_variable_mode_image; + nir_mode = nir_var_uniform; + b->shader->info.num_images++; + } else if (glsl_type_is_sampler(without_array->type)) { + var->mode = vtn_variable_mode_sampler; + nir_mode = nir_var_uniform; + b->shader->info.num_textures++; + } else { + assert(!"Invalid uniform variable type"); + } + break; + case SpvStorageClassPushConstant: + var->mode = vtn_variable_mode_push_constant; + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; + break; + case SpvStorageClassInput: + var->mode = vtn_variable_mode_input; + nir_mode = nir_var_shader_in; + break; + case SpvStorageClassOutput: + var->mode = vtn_variable_mode_output; + nir_mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->mode = vtn_variable_mode_global; + nir_mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->mode = vtn_variable_mode_local; + nir_mode = nir_var_local; + break; + case SpvStorageClassWorkgroup: + var->mode = vtn_variable_mode_workgroup; + nir_mode = nir_var_shared; + break; + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + switch (var->mode) { + case vtn_variable_mode_local: + case vtn_variable_mode_global: + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + case vtn_variable_mode_workgroup: + /* For these, we create the variable normally */ + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->data.mode = nir_mode; + + switch (var->mode) { + case vtn_variable_mode_image: + case vtn_variable_mode_sampler: + var->var->interface_type = without_array->type; + break; + default: + var->var->interface_type = NULL; + break; + } + break; + + case vtn_variable_mode_input: + case vtn_variable_mode_output: { + /* For inputs and outputs, we immediately split structures. This + * is for a couple of reasons. For one, builtins may all come in + * a struct and we really want those split out into separate + * variables. For another, interpolation qualifiers can be + * applied to members of the top-level struct ane we need to be + * able to preserve that information. + */ + + int array_length = -1; + struct vtn_type *interface_type = var->type; + if (b->shader->stage == MESA_SHADER_GEOMETRY && + glsl_type_is_array(var->type->type)) { + /* In Geometry shaders (and some tessellation), inputs come + * in per-vertex arrays. However, some builtins come in + * non-per-vertex, hence the need for the is_array check. In + * any case, there are no non-builtin arrays allowed so this + * check should be sufficient. + */ + interface_type = var->type->array_element; + array_length = glsl_get_length(var->type->type); + } + + if (glsl_type_is_struct(interface_type->type)) { + /* It's a struct. Split it. */ + unsigned num_members = glsl_get_length(interface_type->type); + var->members = ralloc_array(b, nir_variable *, num_members); + + for (unsigned i = 0; i < num_members; i++) { + const struct glsl_type *mtype = interface_type->members[i]->type; + if (array_length >= 0) + mtype = glsl_array_type(mtype, array_length); + + var->members[i] = rzalloc(b->shader, nir_variable); + var->members[i]->name = + ralloc_asprintf(var->members[i], "%s.%d", val->name, i); + var->members[i]->type = mtype; + var->members[i]->interface_type = + interface_type->members[i]->type; + var->members[i]->data.mode = nir_mode; + } + } else { + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->interface_type = interface_type->type; + var->var->data.mode = nir_mode; + } + + /* For inputs and outputs, we need to grab locations and builtin + * information from the interface type. + */ + vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); + break; + + case vtn_variable_mode_param: + unreachable("Not created through OpVariable"); + } + + case vtn_variable_mode_ubo: + case vtn_variable_mode_ssbo: + case vtn_variable_mode_push_constant: + /* These don't need actual variables. */ + break; + } + + if (count > 4) { + assert(count == 5); + nir_constant *constant = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + var->var->constant_initializer = + nir_constant_clone(constant, var->var); + } + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (var->mode == vtn_variable_mode_image || + var->mode == vtn_variable_mode_sampler) { + /* XXX: We still need the binding information in the nir_variable + * for these. We should fix that. + */ + var->var->data.binding = var->binding; + var->var->data.descriptor_set = var->descriptor_set; + + if (var->mode == vtn_variable_mode_image) + var->var->data.image.format = without_array->image_format; + } + + if (var->mode == vtn_variable_mode_local) { + assert(var->members == NULL && var->var != NULL); + nir_function_impl_add_variable(b->impl, var->var); + } else if (var->var) { + nir_shader_add_variable(b->shader, var->var); + } else if (var->members) { + unsigned count = glsl_get_length(without_array->type); + for (unsigned i = 0; i < count; i++) { + assert(var->members[i]->data.mode != nir_var_local); + nir_shader_add_variable(b->shader, var->members[i]); + } + } else { + assert(var->mode == vtn_variable_mode_ubo || + var->mode == vtn_variable_mode_ssbo || + var->mode == vtn_variable_mode_push_constant); + } + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_access_chain *base, *chain; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_access_chain); + base = base_val->access_chain; + } + + chain = vtn_access_chain_extend(b, base, count - 4); + + unsigned idx = base->length; + for (int i = 4; i < count; i++) { + struct vtn_value *link_val = vtn_untyped_value(b, w[i]); + if (link_val->value_type == vtn_value_type_constant) { + chain->link[idx].mode = vtn_access_mode_literal; + chain->link[idx].id = link_val->constant->value.u[0]; + } else { + chain->link[idx].mode = vtn_access_mode_id; + chain->link[idx].id = w[i]; + } + idx++; + } + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = chain; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_access_chain); + val->access_chain = chain; + } + break; + } + + case SpvOpCopyMemory: { + struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); + struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); + + vtn_variable_copy(b, dest->access_chain, src->access_chain); + break; + } + + case SpvOpLoad: { + struct vtn_access_chain *src = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + if (src->var->mode == vtn_variable_mode_image || + src->var->mode == vtn_variable_mode_sampler) { + vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src); + break; + } + + case SpvOpStore: { + struct vtn_access_chain *dest = + vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); + break; + } + + case SpvOpArrayLength: { + struct vtn_access_chain *chain = + vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; + + const uint32_t offset = chain->var->type->offsets[w[4]]; + const uint32_t stride = chain->var->type->members[w[4]]->stride; + + unsigned chain_idx; + struct vtn_type *type; + nir_ssa_def *index = + get_vulkan_resource_index(b, chain, &type, &chain_idx); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_get_buffer_size); + instr->src[0] = nir_src_for_ssa(index); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); + nir_builder_instr_insert(&b->nb, &instr->instr); + nir_ssa_def *buf_size = &instr->dest.ssa; + + /* array_length = max(buffer_size - offset, 0) / stride */ + nir_ssa_def *array_length = + nir_idiv(&b->nb, + nir_imax(&b->nb, + nir_isub(&b->nb, + buf_size, + nir_imm_int(&b->nb, offset)), + nir_imm_int(&b->nb, 0u)), + nir_imm_int(&b->nb, stride)); + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); + val->ssa->def = array_length; + break; + } + + case SpvOpCopyMemorySized: + default: + unreachable("Unhandled opcode"); + } +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 52748a0619a..90732dba961 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -31,7 +31,7 @@ #include "anv_private.h" #include "brw_nir.h" #include "anv_nir.h" -#include "nir/spirv/nir_spirv.h" +#include "spirv/nir_spirv.h" /* Needed for SWIZZLE macros */ #include "program/prog_instruction.h" -- cgit v1.2.3 From e61c812f76eda0cf70317ee8c4070e80e3312e67 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 14 Apr 2016 15:12:41 -0700 Subject: anv/pipeline: Use the right mask for lower_indirect_derefs --- src/intel/vulkan/anv_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 90732dba961..a215a377a96 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -174,11 +174,11 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_shader_gather_info(nir, entry_point->impl); - uint32_t indirect_mask = 0; + nir_variable_mode indirect_mask = 0; if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) - indirect_mask |= (1 << nir_var_shader_in); + indirect_mask |= nir_var_shader_in; if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= 1 << nir_var_local; + indirect_mask |= nir_var_local; nir_lower_indirect_derefs(nir, indirect_mask); -- cgit v1.2.3 From e40b867145160dfb258a6f03c7e6b02f3f839aa4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 10:30:42 -0700 Subject: anv/intel_icd: Don't provide an absolute path The driver will be installed to $(libdir)/libvulkan_intel.so and just providing a driver name is enough for the loader. This also ensures that multi-arch systems work ok. --- src/intel/vulkan/.gitignore | 3 +-- src/intel/vulkan/intel_icd.json | 7 +++++++ src/intel/vulkan/intel_icd.json.in | 7 ------- 3 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 src/intel/vulkan/intel_icd.json delete mode 100644 src/intel/vulkan/intel_icd.json.in (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore index 40afc2e3989..4a683b45487 100644 --- a/src/intel/vulkan/.gitignore +++ b/src/intel/vulkan/.gitignore @@ -5,5 +5,4 @@ /wayland-drm-protocol.c /wayland-drm-client-protocol.h /dev_icd.json -/intel_icd.json -/gen*_pack.h \ No newline at end of file +/gen*_pack.h diff --git a/src/intel/vulkan/intel_icd.json b/src/intel/vulkan/intel_icd.json new file mode 100644 index 00000000000..277c14ec444 --- /dev/null +++ b/src/intel/vulkan/intel_icd.json @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/intel_icd.json.in b/src/intel/vulkan/intel_icd.json.in deleted file mode 100644 index d9b363a9762..00000000000 --- a/src/intel/vulkan/intel_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@install_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} -- cgit v1.2.3 From 48cc8c284aa28405eaa2335bb8f96324c8153ca7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 30 Mar 2016 10:34:58 -0700 Subject: anv: Install the installable ICD --- src/intel/vulkan/Makefile.am | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/intel/vulkan') diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index acf84e55871..cba66713948 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -207,3 +207,6 @@ libvulkan_test_la_CFLAGS = \ libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD) include $(top_srcdir)/install-lib-links.mk + +install-data-local: + $(INSTALL_DATA) -D $(srcdir)/intel_icd.json $(VULKAN_ICD_INSTALL_DIR)/intel_icd.json -- cgit v1.2.3