diff options
author | Kristian Høgsberg <[email protected]> | 2015-05-08 22:32:37 -0700 |
---|---|---|
committer | Kristian Høgsberg <[email protected]> | 2015-05-09 11:38:32 -0700 |
commit | 769785c497aaa60c629e0299e3ebfff53a8e393e (patch) | |
tree | 0388971be6de7a75e2bfe0ac0fa3cc7d1bd7679d /src/vulkan | |
parent | d6fb155f30ac2bd7853da32ddf99e9f7840a8f01 (diff) |
Add vulkan driver for BDW
Diffstat (limited to 'src/vulkan')
-rw-r--r-- | src/vulkan/Makefile.am | 67 | ||||
-rw-r--r-- | src/vulkan/allocator.c | 499 | ||||
-rw-r--r-- | src/vulkan/aub.c | 292 | ||||
-rw-r--r-- | src/vulkan/aub.h | 153 | ||||
-rw-r--r-- | src/vulkan/compiler.cpp | 931 | ||||
-rw-r--r-- | src/vulkan/device.c | 2634 | ||||
-rw-r--r-- | src/vulkan/gem.c | 283 | ||||
-rw-r--r-- | src/vulkan/gen8_pack.h | 8702 | ||||
-rw-r--r-- | src/vulkan/image.c | 404 | ||||
-rw-r--r-- | src/vulkan/intel.c | 93 | ||||
-rw-r--r-- | src/vulkan/meta.c | 140 | ||||
-rw-r--r-- | src/vulkan/pipeline.c | 565 | ||||
-rw-r--r-- | src/vulkan/private.h | 594 | ||||
-rw-r--r-- | src/vulkan/util.c | 99 | ||||
-rw-r--r-- | src/vulkan/vk.c | 723 |
15 files changed, 16179 insertions, 0 deletions
diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am new file mode 100644 index 00000000000..b131ac13897 --- /dev/null +++ b/src/vulkan/Makefile.am @@ -0,0 +1,67 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +lib_LTLIBRARIES = libvulkan.la + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include + +libvulkan_la_CFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init + +libvulkan_la_CXXFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g + +libvulkan_la_SOURCES = \ + private.h \ + gem.c \ + device.c \ + aub.c \ + allocator.c \ + util.c \ + pipeline.c \ + image.c \ + meta.c \ + intel.c \ + compiler.cpp + +bin_PROGRAMS = vk + +vk_SOURCES = vk.c +vk_LDADD = libvulkan.la -lpng16 + +libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/allocator.c b/src/vulkan/allocator.c new file mode 100644 index 00000000000..67abaa45464 --- /dev/null +++ b/src/vulkan/allocator.c @@ -0,0 +1,499 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <values.h> +#include <assert.h> +#include <linux/futex.h> +#include <linux/memfd.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "private.h" + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) +{ + union anv_free_list current, next, old; + + current = *list; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + next.offset = *(uint32_t *)(*map + current.offset); + next.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, next.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) +{ + union anv_free_list current, old, new; + uint32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + *next_ptr = current.offset; + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->size = 0; + pool->block_size = block_size; + pool->next_block = 0; + pool->free_list = ANV_FREE_LIST_EMPTY; + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + /* Immediately grow the pool so we'll have a backing bo. */ + anv_block_pool_grow(pool); +} + +/* The memfd path lets us create a map for an fd and lets us grow and remap + * without copying. It breaks valgrind however, so we have a MAP_ANONYMOUS + * path we can take for valgrind debugging. */ + +#define USE_MEMFD 0 + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + +#if USE_MEMFD + close(pool->fd); +#endif +} + +static int +anv_block_pool_grow(struct anv_block_pool *pool) +{ + size_t size; + void *map; + int gem_handle; + struct anv_mmap_cleanup *cleanup; + + if (pool->size == 0) { + size = 32 * pool->block_size; + } else { + size = pool->size * 2; + } + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + return -1; + *cleanup = ANV_MMAP_CLEANUP_INIT; + +#if USE_MEMFD + if (pool->size == 0) + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + + if (pool->fd == -1) + return -1; + + if (ftruncate(pool->fd, size) == -1) + return -1; + + /* First try to see if mremap can grow the map in place. */ + map = MAP_FAILED; + if (pool->size > 0) + map = mremap(pool->map, pool->size, size, 0); + if (map == MAP_FAILED) { + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, 0); + cleanup->map = map; + cleanup->size = size; + } + if (map == MAP_FAILED) + return -1; +#else + /* The MAP_ANONYMOUS fallback can't grow without races, so just bail here + * if we're trying to grow the pool. */ + assert(pool->size == 0); + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); + if (map == MAP_FAILED) + return -1; + cleanup->map = map; + cleanup->size = size; +#endif + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + return -1; + cleanup->gem_handle = gem_handle; + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + + /* Write size last and after the memory barrier here. We need the memory + * barrier to make sure map and gem_handle are written before other threads + * see the new size. A thread could allocate a block and then go try using + * the old pool->map and access out of bounds. */ + + __sync_synchronize(); + pool->size = size; + + return 0; +} + +uint32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + uint32_t offset, block, size; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) + return offset; + + restart: + size = pool->size; + block = __sync_fetch_and_add(&pool->next_block, pool->block_size); + if (block < size) { + return block; + } else if (block == size) { + /* We allocated the first block outside the pool, we have to grow it. + * pool->next_block acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + anv_block_pool_grow(pool); + futex_wake(&pool->size, INT_MAX); + } else { + futex_wait(&pool->size, size); + __sync_fetch_and_add(&pool->next_block, -pool->block_size); + goto restart; + } + + return block; +} + +void +anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) +{ + anv_free_list_push(&pool->free_list, pool->map, offset); +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + uint32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) + return offset; + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + new.next = anv_block_pool_alloc(block_pool); + new.end = new.next + block_pool->block_size; + old.u64 = __sync_fetch_and_add(&pool->block.u64, new.u64 - block.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return new.next; + } else { + futex_wait(&pool->block.end, block.end); + __sync_fetch_and_add(&pool->block.u64, -pool->state_size); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct stream_block { + uint32_t next; +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->next = 0; + stream->end = 0; + stream->current_block = NULL_BLOCK; +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + struct stream_block *sb; + uint32_t block, next_block; + + block = stream->current_block; + while (block != 1) { + sb = stream->block_pool->map + block; + next_block = sb->next; + anv_block_pool_free(stream->block_pool, block); + block = next_block; + } +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct stream_block *sb; + struct anv_state state; + uint32_t block; + + state.offset = ALIGN_U32(stream->next, alignment); + if (state.offset + size > stream->end) { + block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + sb->next = stream->current_block; + stream->current_block = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + state.offset = ALIGN_U32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + stream->next = state.offset + size; + + state.alloc_size = size; + state.map = stream->block_pool->map + state.offset; + + return state; +} diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c new file mode 100644 index 00000000000..5e66aa839e3 --- /dev/null +++ b/src/vulkan/aub.c @@ -0,0 +1,292 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <drm.h> +#include <i915_drm.h> + +#include "private.h" +#include "aub.h" + +struct anv_aub_writer { + FILE *file; + uint32_t offset; + int gen; +}; + +static void +aub_out(struct anv_aub_writer *writer, uint32_t data) +{ + fwrite(&data, 1, 4, writer->file); +} + +static void +aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) +{ + fwrite(data, 1, size, writer->file); +} + +static struct anv_aub_writer * +get_anv_aub_writer(struct anv_device *device) +{ + struct anv_aub_writer *writer = device->aub_writer; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (geteuid() != getuid()) + return NULL; + + if (writer) + return writer; + + writer = malloc(sizeof(*writer)); + if (writer == NULL) + return NULL; + + filename = "intel.aub"; + writer->gen = device->info.gen; + writer->file = fopen(filename, "w+"); + if (!writer->file) { + free(writer); + return NULL; + } + + /* Start allocating objects from just after the GTT. */ + writer->offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(writer, CMD_AUB_HEADER | (13 - 2)); + aub_out(writer, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(writer, 0); /* app name */ + } + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT_ENTRY | + AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, 0); /* subtype */ + aub_out(writer, 0); /* offset */ + aub_out(writer, gtt_size); /* size */ + if (writer->gen >= 8) + aub_out(writer, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(writer, entry); + } + + return device->aub_writer = writer; +} + +void +anv_aub_writer_destroy(struct anv_aub_writer *writer) +{ + fclose(writer->file); + free(writer); +} + + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, + void *virtual, uint32_t size, uint32_t gtt_offset) +{ + uint32_t block_size; + uint32_t offset; + uint32_t subtype = 0; + static const char null_block[8 * 4096]; + + for (offset = 0; offset < size; offset += block_size) { + block_size = size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | + type | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, subtype); + aub_out(writer, gtt_offset + offset); + aub_out(writer, ALIGN_U32(block_size, 4)); + if (writer->gen >= 8) + aub_out(writer, 0); + + if (virtual) + aub_out_data(writer, (char *) virtual + offset, block_size); + else + aub_out_data(writer, null_block, block_size); + + /* Pad to a multiple of 4 bytes. */ + aub_out_data(writer, null_block, -block_size & 3); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(struct anv_aub_writer *writer, + uint32_t batch_offset, uint32_t offset, + int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (writer->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_offset; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_offset; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(writer, 0); /* general/surface subtype */ + aub_out(writer, offset); + aub_out(writer, ring_count * 4); + if (writer->gen >= 8) + aub_out(writer, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(writer, ringbuffer, ring_count * 4); +} + +struct aub_bo { + uint32_t offset; + void *map; + void *relocated; +}; + +static void +relocate_bo(struct anv_bo *bo, struct anv_reloc_list *list, struct aub_bo *bos) +{ + struct aub_bo *aub_bo = &bos[bo->index]; + struct drm_i915_gem_relocation_entry *reloc; + uint32_t *dw; + + aub_bo->relocated = malloc(bo->size); + memcpy(aub_bo->relocated, aub_bo->map, bo->size); + for (size_t i = 0; i < list->num_relocs; i++) { + reloc = &list->relocs[i]; + assert(reloc->offset < bo->size); + dw = aub_bo->relocated + reloc->offset; + *dw = bos[reloc->target_handle].offset + reloc->delta; + } +} + +void +anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_aub_writer *writer; + struct anv_bo *bo; + uint32_t ring_flag = 0; + uint32_t offset, length; + struct aub_bo *aub_bos; + + writer = get_anv_aub_writer(device); + if (writer == NULL) + return; + + aub_bos = malloc(cmd_buffer->bo_count * sizeof(aub_bos[0])); + offset = writer->offset; + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (bo->map) + aub_bos[i].map = bo->map; + else + aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].relocated = aub_bos[i].map; + aub_bos[i].offset = offset; + offset = ALIGN_U32(offset + bo->size + 4095, 4096); + } + + relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); + relocate_bo(&device->surface_state_block_pool.bo, + &batch->surf_relocs, aub_bos); + + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { + bo = cmd_buffer->exec2_bos[i]; + if (i == cmd_buffer->bo_count - 1) { + length = batch->next - batch->bo.map; + aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, + aub_bos[i].relocated, + length, aub_bos[i].offset); + } else { + aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } + if (aub_bos[i].relocated != aub_bos[i].map) + free(aub_bos[i].relocated); + if (aub_bos[i].map != bo->map) + anv_gem_munmap(aub_bos[i].map, bo->size); + } + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(writer, aub_bos[batch->bo.index].offset, + offset, ring_flag); + + free(aub_bos); + + fflush(writer->file); +} diff --git a/src/vulkan/aub.h b/src/vulkan/aub.h new file mode 100644 index 00000000000..7a67712ff9c --- /dev/null +++ b/src/vulkan/aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), + AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), + AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), + AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), + AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), + AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), + AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), + AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), + AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), + AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), + + AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), + AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), + AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), + + AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), + AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), + AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), + AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), + AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), +}; + +#undef ENCODE_SS_TYPE + +/** + * Decode a aub_state_struct_type value to determine the type that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) +{ + return (ss_type & 0xFFFF0000) >> 16; +} + +/** + * Decode a state_struct_type value to determine the subtype that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) +{ + return ss_type & 0xFFFF; +} + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp new file mode 100644 index 00000000000..d7428d8a877 --- /dev/null +++ b/src/vulkan/compiler.cpp @@ -0,0 +1,931 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include <brw_context.h> +#include <brw_wm.h> /* brw_new_shader_program is here */ + +#include <brw_vs.h> +#include <brw_gs.h> + +#include <mesa/main/shaderobj.h> +#include <mesa/main/fbobject.h> +#include <mesa/program/program.h> +#include <glsl/program.h> + +#include "private.h" + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + uint32_t count, bias, set, *map; + + struct anv_pipeline_layout_entry *entries; + + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + count = pipeline->layout->stage[stage].count; + entries = pipeline->layout->stage[stage].entries; + + prog_data->map_entries = + (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); + if (prog_data->map_entries == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + set = 0; + map = prog_data->map_entries; + for (uint32_t i = 0; i < count; i++) { + if (entries[i].set == set) { + prog_data->bind_map[set] = map; + set++; + } + *map++ = bias + i; + } + + return VK_SUCCESS; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key->base.program_string_id = vp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + vp->program.Base.UsesClipDistanceOut); + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, + &key->base.tex); +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + c.vp = vp; + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (vs) { + /* We add padding around uniform values below vec4 size, with the worst + * case being a float value that gets blown up to a vec4, so be + * conservative here. + */ + param_count = vs->num_uniform_components * 4; + + } else { + param_count = vp->program.Base.Parameters->NumParameters * 4; + } + /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip + * planes as uniforms. + */ + param_count += c.key.base.nr_userclip_plane_consts * 4; + + /* Setting nr_params here NOT to the size of the param and pull_param + * arrays, but to the number of uniform components vec4_visitor + * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. + */ + stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; + if (vs) { + stage_prog_data->nr_params += vs->num_samplers; + } + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (c.key.copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (c.key.base.userclip_active) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data->base.vue_map, outputs_written); +\ + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ + program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + pipeline->vs_simd8 = pipeline->program_next; + memcpy((char *) pipeline->device->instruction_block_pool.map + + pipeline->vs_simd8, program, program_size); + + pipeline->program_next = align(pipeline->program_next + program_size, 64); + + ralloc_free(mem_ctx); + + if (stage_prog_data->total_scratch > 0) + if (!anv_bo_init_new(&pipeline->vs_scratch_bo, + pipeline->device, + stage_prog_data->total_scratch)) + return false; + + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_program *prog = (struct gl_program *) brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + uint32_t offset; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (fs) { + param_count = fs->num_uniform_components; + } else { + param_count = fp->program.Base.Parameters->NumParameters * 4; + } + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + + program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + offset = pipeline->program_next; + pipeline->program_next = align(pipeline->program_next + program_size, 64); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + else + pipeline->ps_simd16 = NO_KERNEL; + + memcpy((char *) pipeline->device->instruction_block_pool.map + + offset, program, program_size); + + ralloc_free(mem_ctx); + + if (prog_data->base.total_scratch > 0) + if (!anv_bo_init_new(&pipeline->ps_scratch_bo, + pipeline->device, + prog_data->base.total_scratch)) + return false; + + return true; +} + +static void +brw_gs_populate_key(struct brw_context *brw, + struct anv_pipeline *pipeline, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + + key->base.program_string_id = gp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + gp->program.Base.UsesClipDistanceOut); + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key->base.tex); + + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + + /* BRW_NEW_VUE_MAP_VS */ + key->input_varyings = prog_data->base.vue_map.slots_valid; +} + +static bool +really_do_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct brw_gs_compile_output output; + uint32_t offset; + + /* FIXME: We pass the bind map to the compile in the output struct. Need + * something better. */ + set_binding_table_layout(&output.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + brw_compile_gs_prog(brw, prog, gp, key, &output); + + offset = pipeline->program_next; + pipeline->program_next = align(pipeline->program_next + output.program_size, 64); + + pipeline->gs_vec4 = offset; + pipeline->gs_vertex_count = gp->program.VerticesIn; + + memcpy((char *) pipeline->device->instruction_block_pool.map + + offset, output.program, output.program_size); + + ralloc_free(output.mem_ctx); + + if (output.prog_data.base.base.total_scratch) { + if (!anv_bo_init_new(&pipeline->gs_scratch_bo, + pipeline->device, + output.prog_data.base.base.total_scratch)) + return false; + } + + memcpy(&pipeline->gs_prog_data, &output.prog_data, sizeof pipeline->gs_prog_data); + + return true; +} + +static void +fail_on_compile_error(int status, const char *msg) +{ + int source, line, column; + char error[256]; + + if (status) + return; + + if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) + fail_if(!status, "%d:%s\n", line, error); + else + fail_if(!status, "%s\n", msg); +} + +struct anv_compiler { + struct intel_screen *screen; + struct brw_context *brw; +}; + + +extern "C" { + +struct anv_compiler * +anv_compiler_create(int fd) +{ + struct anv_compiler *compiler; + + compiler = (struct anv_compiler *) malloc(sizeof *compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = intel_screen_create(fd); + if (compiler->screen == NULL) { + free(compiler); + return NULL; + } + + compiler->brw = intel_context_create(compiler->screen); + if (compiler->brw == NULL) { + free(compiler); + return NULL; + } + + compiler->brw->precompile = false; + + return compiler; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + intel_context_destroy(compiler->brw); + intel_screen_destroy(compiler->screen); + free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + unsigned vs_size = pipeline->vs_prog_data.base.urb_entry_size; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, "vertex" }, + { GL_TESS_CONTROL_SHADER, "tess control" }, + { GL_TESS_EVALUATION_SHADER, "tess evaluation" }, + { GL_GEOMETRY_SHADER, "geometry" }, + { GL_FRAGMENT_SHADER, "fragment" }, + { GL_COMPUTE_SHADER, "compute" }, +}; + +static void +anv_compile_shader(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct gl_shader *shader; + int name = 0; + + shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); + shader->Source = strdup(pipeline->shaders[stage]->data); + _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); + fail_on_compile_error(shader->CompileStatus, shader->InfoLog); + + program->Shaders[program->NumShaders] = shader; + program->NumShaders++; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + struct anv_device *device = pipeline->device; + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = brw->ctx.Driver.NewShaderProgram(name); + program->Shaders = (struct gl_shader **) + calloc(VK_NUM_SHADER_STAGE, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + /* FIXME: Only supports vs and fs combo at the moment */ + assert(pipeline->shaders[VK_SHADER_STAGE_VERTEX]); + assert(pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]); + + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_VERTEX); + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_FRAGMENT); + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) + anv_compile_shader(compiler, program, pipeline, VK_SHADER_STAGE_GEOMETRY); + + _mesa_glsl_link_shader(&brw->ctx, program); + fail_on_compile_error(program->LinkStatus, + program->InfoLog); + + pipeline->program_block = + anv_block_pool_alloc(&device->instruction_block_pool); + pipeline->program_next = pipeline->program_block; + + + bool success; + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_FRAGMENT] = &pipeline->wm_prog_data.base; + + + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_VERTEX] = &pipeline->vs_prog_data.base.base; + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + brw_gs_populate_key(brw, pipeline, bgp, &gs_key); + + success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + pipeline->active_stages = VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + pipeline->prog_data[VK_SHADER_STAGE_GEOMETRY] = &pipeline->gs_prog_data.base.base; + + } else { + pipeline->gs_vec4 = NO_KERNEL; + pipeline->active_stages = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + } + + + /* FIXME: Allocate more blocks if we fill up this one and worst case, + * allocate multiple continuous blocks from end of pool to hold really big + * programs. */ + assert(pipeline->program_next - pipeline->program_block < 8192); + + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + struct anv_device *device = pipeline->device; + + for (uint32_t stage = 0; stage < VK_NUM_SHADER_STAGE; stage++) + if (pipeline->prog_data[stage]) + free(pipeline->prog_data[stage]->map_entries); + + anv_block_pool_free(&device->instruction_block_pool, + pipeline->program_block); +} + +} diff --git a/src/vulkan/device.c b/src/vulkan/device.c new file mode 100644 index 00000000000..09b21e50c7c --- /dev/null +++ b/src/vulkan/device.c @@ -0,0 +1,2634 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +static int +anv_env_get_int(const char *name) +{ + const char *val = getenv(name); + + if (!val) + return 0; + + return strtol(val, NULL, 0); +} + +static VkResult +fill_physical_device(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + int fd; + + fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_error(VK_ERROR_UNAVAILABLE); + + device->instance = instance; + device->path = path; + + device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); + device->no_hw = false; + if (device->chipset_id) { + /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ + device->no_hw = true; + } else { + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + } + if (!device->chipset_id) + goto fail; + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id, -1); + if (!device->info) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + goto fail; + + close(fd); + + return VK_SUCCESS; + + fail: + close(fd); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +static void *default_alloc( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void default_free( + void* pUserData, + void* pMem) +{ + free(pMem); +} + +static const VkAllocCallbacks default_alloc_callbacks = { + .pUserData = NULL, + .pfnAlloc = default_alloc, + .pfnFree = default_free +}; + +VkResult VKAPI vkCreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + VkInstance* pInstance) +{ + struct anv_instance *instance; + const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; + void *user_data = NULL; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + if (pCreateInfo->pAllocCb) { + alloc_callbacks = pCreateInfo->pAllocCb; + user_data = pCreateInfo->pAllocCb->pUserData; + } + instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->pAllocUserData = alloc_callbacks->pUserData; + instance->pfnAlloc = alloc_callbacks->pfnAlloc; + instance->pfnFree = alloc_callbacks->pfnFree; + instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; + + instance->physicalDeviceCount = 0; + result = fill_physical_device(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_SUCCESS) + instance->physicalDeviceCount++; + + *pInstance = (VkInstance) instance; + + return VK_SUCCESS; +} + +VkResult VKAPI vkDestroyInstance( + VkInstance _instance) +{ + struct anv_instance *instance = (struct anv_instance *) _instance; + + instance->pfnFree(instance->pAllocUserData, instance); + + return VK_SUCCESS; +} + +VkResult VKAPI vkEnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + struct anv_instance *instance = (struct anv_instance *) _instance; + + if (*pPhysicalDeviceCount >= 1) + pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice; + *pPhysicalDeviceCount = instance->physicalDeviceCount; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetPhysicalDeviceInfo( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceInfoType infoType, + size_t* pDataSize, + void* pData) +{ + struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice; + VkPhysicalDeviceProperties *properties; + VkPhysicalDevicePerformance *performance; + VkPhysicalDeviceQueueProperties *queue_properties; + VkPhysicalDeviceMemoryProperties *memory_properties; + uint64_t ns_per_tick = 80; + + switch (infoType) { + case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES: + properties = pData; + assert(*pDataSize >= sizeof(*properties)); + *pDataSize = sizeof(*properties); /* Assuming we have to return the size of our struct. */ + + properties->apiVersion = 1; + properties->driverVersion = 1; + properties->vendorId = 0x8086; + properties->deviceId = device->chipset_id; + properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + strcpy(properties->deviceName, device->name); + properties->maxInlineMemoryUpdateSize = 0; + properties->maxBoundDescriptorSets = 0; + properties->maxThreadGroupSize = 0; + properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick; + properties->multiColorAttachmentClears = 0; + properties->maxDescriptorSets = 2; + properties->maxViewports = 16; + properties->maxColorAttachments = 8; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE: + performance = pData; + assert(*pDataSize >= sizeof(*performance)); + *pDataSize = sizeof(*performance); /* Assuming we have to return the size of our struct. */ + + performance->maxDeviceClock = 1.0; + performance->aluPerClock = 1.0; + performance->texPerClock = 1.0; + performance->primsPerClock = 1.0; + performance->pixelsPerClock = 1.0; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES: + queue_properties = pData; + assert(*pDataSize >= sizeof(*queue_properties)); + *pDataSize = sizeof(*queue_properties); + + queue_properties->queueFlags = 0; + queue_properties->queueCount = 1; + queue_properties->maxAtomicCounters = 0; + queue_properties->supportsTimestamps = 0; + queue_properties->maxMemReferences = 0; + return VK_SUCCESS; + + case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES: + memory_properties = pData; + assert(*pDataSize >= sizeof(*memory_properties)); + *pDataSize = sizeof(*memory_properties); + + memory_properties->supportsMigration = false; + memory_properties->supportsPinning = false; + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } + +} + +void * vkGetProcAddr( + VkPhysicalDevice physicalDevice, + const char* pName) +{ + return NULL; +} + +static void +parse_debug_flags(struct anv_device *device) +{ + const char *debug, *p, *end; + + debug = getenv("INTEL_DEBUG"); + device->dump_aub = false; + if (debug) { + for (p = debug; *p; p = end + 1) { + end = strchrnul(p, ','); + if (end - p == 3 && memcmp(p, "aub", 3) == 0) + device->dump_aub = true; + if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) + device->no_hw = true; + if (*end == '\0') + break; + } + } +} + +VkResult VKAPI vkCreateDevice( + VkPhysicalDevice _physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + VkDevice* pDevice) +{ + struct anv_physical_device *physicalDevice = + (struct anv_physical_device *) _physicalDevice; + struct anv_instance *instance = physicalDevice->instance; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + device = instance->pfnAlloc(instance->pAllocUserData, + sizeof(*device), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->no_hw = physicalDevice->no_hw; + parse_debug_flags(device); + + device->instance = physicalDevice->instance; + device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC); + if (device->fd == -1) + goto fail_device; + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) + goto fail_fd; + + anv_block_pool_init(&device->dyn_state_block_pool, device, 2048); + + anv_state_pool_init(&device->dyn_state_pool, + &device->dyn_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 2048); + anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + device->compiler = anv_compiler_create(device->fd); + device->aub_writer = NULL; + + device->info = *physicalDevice->info; + + pthread_mutex_init(&device->mutex, NULL); + + *pDevice = (VkDevice) device; + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_device_free(device, device); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +VkResult VKAPI vkDestroyDevice( + VkDevice _device) +{ + struct anv_device *device = (struct anv_device *) _device; + + anv_compiler_destroy(device->compiler); + + anv_block_pool_finish(&device->dyn_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + + close(device->fd); + + if (device->aub_writer) + anv_aub_writer_destroy(device->aub_writer); + + anv_device_free(device, device); + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetGlobalExtensionInfo( + VkExtensionInfoType infoType, + uint32_t extensionIndex, + size_t* pDataSize, + void* pData) +{ + uint32_t *count; + + switch (infoType) { + case VK_EXTENSION_INFO_TYPE_COUNT: + count = pData; + assert(*pDataSize == 4); + *count = 0; + return VK_SUCCESS; + + case VK_EXTENSION_INFO_TYPE_PROPERTIES: + return vk_error(VK_ERROR_INVALID_EXTENSION); + + default: + return VK_UNSUPPORTED; + } +} + +VkResult VKAPI vkGetPhysicalDeviceExtensionInfo( + VkPhysicalDevice physicalDevice, + VkExtensionInfoType infoType, + uint32_t extensionIndex, + size_t* pDataSize, + void* pData) +{ + uint32_t *count; + + switch (infoType) { + case VK_EXTENSION_INFO_TYPE_COUNT: + count = pData; + assert(*pDataSize == 4); + *count = 0; + return VK_SUCCESS; + + case VK_EXTENSION_INFO_TYPE_PROPERTIES: + return vk_error(VK_ERROR_INVALID_EXTENSION); + + default: + return VK_UNSUPPORTED; + } +} + +VkResult VKAPI vkEnumerateLayers( + VkPhysicalDevice physicalDevice, + size_t maxStringSize, + size_t* pLayerCount, + char* const* pOutLayers, + void* pReserved) +{ + *pLayerCount = 0; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_queue *queue; + + /* FIXME: Should allocate these at device create time. */ + + queue = anv_device_alloc(device, sizeof(*queue), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (queue == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + queue->device = device; + queue->pool = &device->surface_state_pool; + + queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); + *(uint32_t *)queue->completed_serial.map = 0; + queue->next_serial = 1; + + *pQueue = (VkQueue) queue; + + return VK_SUCCESS; +} + +static const uint32_t BATCH_SIZE = 8192; + +VkResult +anv_batch_init(struct anv_batch *batch, struct anv_device *device) +{ + VkResult result; + + result = anv_bo_init_new(&batch->bo, device, BATCH_SIZE); + if (result != VK_SUCCESS) + return result; + + batch->bo.map = + anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); + if (batch->bo.map == NULL) { + anv_gem_close(device, batch->bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + batch->cmd_relocs.num_relocs = 0; + batch->surf_relocs.num_relocs = 0; + batch->next = batch->bo.map; + + return VK_SUCCESS; +} + +void +anv_batch_finish(struct anv_batch *batch, struct anv_device *device) +{ + anv_gem_munmap(batch->bo.map, BATCH_SIZE); + anv_gem_close(device, batch->bo.gem_handle); +} + +void +anv_batch_reset(struct anv_batch *batch) +{ + batch->next = batch->bo.map; + batch->cmd_relocs.num_relocs = 0; + batch->surf_relocs.num_relocs = 0; +} + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + void *p = batch->next; + + batch->next += num_dwords * 4; + + return p; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + struct anv_reloc_list *other, uint32_t offset) +{ + uint32_t i, count; + + count = list->num_relocs; + memcpy(&list->relocs[count], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[count], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + for (i = 0; i < other->num_relocs; i++) + list->relocs[i + count].offset += offset; + + count += other->num_relocs; +} + +static uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + uint32_t offset, + struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + assert(list->num_relocs < ANV_BATCH_MAX_RELOCS); + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->bo.map; + memcpy(batch->next, other->bo.map, size); + + offset = batch->next - batch->bo.map; + anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); + anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset); + + batch->next += size; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(&batch->cmd_relocs, + location - batch->bo.map, bo, delta); +} + +VkResult VKAPI vkQueueSubmit( + VkQueue _queue, + uint32_t cmdBufferCount, + const VkCmdBuffer* pCmdBuffers, + VkFence fence) +{ + struct anv_queue *queue = (struct anv_queue *) _queue; + struct anv_device *device = queue->device; + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) pCmdBuffers[0]; + int ret; + + assert(cmdBufferCount == 1); + + if (device->dump_aub) + anv_cmd_buffer_dump(cmd_buffer); + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf); + if (ret != 0) + goto fail; + + for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) + cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset; + } else { + *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + } + + return VK_SUCCESS; + + fail: + pthread_mutex_unlock(&device->mutex); + + return vk_error(VK_ERROR_UNKNOWN); +} + +VkResult VKAPI vkQueueAddMemReferences( + VkQueue queue, + uint32_t count, + const VkDeviceMemory* pMems) +{ + return VK_SUCCESS; +} + +VkResult vkQueueRemoveMemReferences( + VkQueue queue, + uint32_t count, + const VkDeviceMemory* pMems) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkQueueWaitIdle( + VkQueue _queue) +{ + struct anv_queue *queue = (struct anv_queue *) _queue; + + return vkDeviceWaitIdle((VkDevice) queue->device); +} + +VkResult VKAPI vkDeviceWaitIdle( + VkDevice _device) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_state state; + struct anv_batch batch; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result; + int64_t timeout; + int ret; + + state = anv_state_pool_alloc(&device->dyn_state_pool, 32, 32); + bo = &device->dyn_state_pool.block_pool->bo; + batch.next = state.map; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch.next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + } + + anv_state_pool_free(&device->dyn_state_pool, state); + + return VK_SUCCESS; + + fail: + anv_state_pool_free(&device->dyn_state_pool, state); + + return result; +} + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return device->instance->pfnAlloc(device->instance->pAllocUserData, + size, + alignment, + allocType); +} + +void +anv_device_free(struct anv_device * device, + void * mem) +{ + return device->instance->pfnFree(device->instance->pAllocUserData, + mem); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + + return VK_SUCCESS; +} + +VkResult VKAPI vkAllocMemory( + VkDevice _device, + const VkMemoryAllocInfo* pAllocInfo, + VkDeviceMemory* pMem) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); + if (result != VK_SUCCESS) + goto fail; + + *pMem = (VkDeviceMemory) mem; + + return VK_SUCCESS; + + fail: + anv_device_free(device, mem); + + return result; +} + +VkResult VKAPI vkFreeMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_device_free(device, mem); + + return VK_SUCCESS; +} + +VkResult VKAPI vkSetMemoryPriority( + VkDevice device, + VkDeviceMemory mem, + VkMemoryPriority priority) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkMapMemory( + VkDevice _device, + VkDeviceMemory _mem, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); + mem->map_size = size; + + *ppData = mem->map; + + return VK_SUCCESS; +} + +VkResult VKAPI vkUnmapMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + anv_gem_munmap(mem->map, mem->map_size); + + return VK_SUCCESS; +} + +VkResult VKAPI vkFlushMappedMemory( + VkDevice device, + VkDeviceMemory mem, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* clflush here for !llc platforms */ + + return VK_SUCCESS; +} + +VkResult VKAPI vkPinSystemMemory( + VkDevice device, + const void* pSysMem, + size_t memSize, + VkDeviceMemory* pMem) +{ + return VK_SUCCESS; +} + +VkResult VKAPI vkGetMultiDeviceCompatibility( + VkPhysicalDevice physicalDevice0, + VkPhysicalDevice physicalDevice1, + VkPhysicalDeviceCompatibilityInfo* pInfo) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenSharedMemory( + VkDevice device, + const VkMemoryOpenInfo* pOpenInfo, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenSharedSemaphore( + VkDevice device, + const VkSemaphoreOpenInfo* pOpenInfo, + VkSemaphore* pSemaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenPeerMemory( + VkDevice device, + const VkPeerMemoryOpenInfo* pOpenInfo, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkOpenPeerImage( + VkDevice device, + const VkPeerImageOpenInfo* pOpenInfo, + VkImage* pImage, + VkDeviceMemory* pMem) +{ + return VK_UNSUPPORTED; +} + +static VkResult +anv_instance_destructor(struct anv_device * device, + VkObject object) +{ + return vkDestroyInstance(object); +} + +static VkResult +anv_noop_destructor(struct anv_device * device, + VkObject object) +{ + return VK_SUCCESS; +} + +static VkResult +anv_device_destructor(struct anv_device * device, + VkObject object) +{ + return vkDestroyDevice(object); +} + +static VkResult +anv_cmd_buffer_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_batch_finish(&cmd_buffer->batch, device); + anv_device_free(device, cmd_buffer->exec2_objects); + anv_device_free(device, cmd_buffer->exec2_bos); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_pipeline *pipeline = (struct anv_pipeline *) object; + + return anv_pipeline_destroy(pipeline); +} + +static VkResult +anv_free_destructor(struct anv_device * device, + VkObject object) +{ + anv_device_free(device, (void *) object); + + return VK_SUCCESS; +} + +static VkResult (*anv_object_destructors[])(struct anv_device *device, + VkObject object) = { + [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, + [VK_OBJECT_TYPE_PHYSICAL_DEVICE] = anv_noop_destructor, + [VK_OBJECT_TYPE_DEVICE] = anv_device_destructor, + [VK_OBJECT_TYPE_QUEUE] = anv_noop_destructor, + [VK_OBJECT_TYPE_COMMAND_BUFFER] = anv_cmd_buffer_destructor, + [VK_OBJECT_TYPE_PIPELINE] = anv_pipeline_destructor, + [VK_OBJECT_TYPE_SHADER] = anv_free_destructor, + [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, + [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, + [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor +}; + +VkResult VKAPI vkDestroyObject( + VkDevice _device, + VkObjectType objType, + VkObject object) +{ + struct anv_device *device = (struct anv_device *) _device; + + assert(objType < ARRAY_SIZE(anv_object_destructors) && + anv_object_destructors[objType] != NULL); + + return anv_object_destructors[objType](device, object); +} + +static void +fill_memory_requirements( + VkObjectType objType, + VkObject object, + VkMemoryRequirements * memory_requirements) +{ + struct anv_buffer *buffer; + struct anv_image *image; + + memory_requirements->memPropsAllowed = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT | + /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */ + VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT | + VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL | + VK_MEMORY_PROPERTY_SHAREABLE_BIT; + + memory_requirements->memPropsRequired = 0; + + switch (objType) { + case VK_OBJECT_TYPE_BUFFER: + buffer = (struct anv_buffer *) object; + memory_requirements->size = buffer->size; + memory_requirements->alignment = 16; + break; + case VK_OBJECT_TYPE_IMAGE: + image = (struct anv_image *) object; + memory_requirements->size = image->size; + memory_requirements->alignment = image->alignment; + break; + default: + memory_requirements->size = 0; + break; + } +} + +VkResult VKAPI vkGetObjectInfo( + VkDevice _device, + VkObjectType objType, + VkObject object, + VkObjectInfoType infoType, + size_t* pDataSize, + void* pData) +{ + VkMemoryRequirements memory_requirements; + + switch (infoType) { + case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS: + fill_memory_requirements(objType, object, &memory_requirements); + memcpy(pData, &memory_requirements, + MIN2(*pDataSize, sizeof(memory_requirements))); + *pDataSize = sizeof(memory_requirements); + return VK_SUCCESS; + + case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT: + default: + return VK_UNSUPPORTED; + } + +} + +VkResult VKAPI vkQueueBindObjectMemory( + VkQueue queue, + VkObjectType objType, + VkObject object, + uint32_t allocationIdx, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + struct anv_buffer *buffer; + struct anv_image *image; + struct anv_device_memory *mem = (struct anv_device_memory *) _mem; + + switch (objType) { + case VK_OBJECT_TYPE_BUFFER: + buffer = (struct anv_buffer *) object; + buffer->mem = mem; + buffer->offset = memOffset; + break; + case VK_OBJECT_TYPE_IMAGE: + image = (struct anv_image *) object; + image->mem = mem; + image->offset = memOffset; + break; + default: + break; + } + + return VK_SUCCESS; +} + +VkResult VKAPI vkQueueBindObjectMemoryRange( + VkQueue queue, + VkObjectType objType, + VkObject object, + uint32_t allocationIdx, + VkDeviceSize rangeOffset, + VkDeviceSize rangeSize, + VkDeviceMemory mem, + VkDeviceSize memOffset) +{ + return VK_UNSUPPORTED; +} + +VkResult vkQueueBindImageMemoryRange( + VkQueue queue, + VkImage image, + uint32_t allocationIdx, + const VkImageMemoryBindInfo* pBindInfo, + VkDeviceMemory mem, + VkDeviceSize memOffset) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateFence( + VkDevice device, + const VkFenceCreateInfo* pCreateInfo, + VkFence* pFence) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetFences( + VkDevice device, + uint32_t fenceCount, + VkFence* pFences) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkGetFenceStatus( + VkDevice device, + VkFence fence) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkWaitForFences( + VkDevice device, + uint32_t fenceCount, + const VkFence* pFences, + bool32_t waitAll, + uint64_t timeout) +{ + return VK_UNSUPPORTED; +} + +// Queue semaphore functions + +VkResult VKAPI vkCreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + VkSemaphore* pSemaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkQueueSignalSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkQueueWaitSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + return VK_UNSUPPORTED; +} + +// Event functions + +VkResult VKAPI vkCreateEvent( + VkDevice device, + const VkEventCreateInfo* pCreateInfo, + VkEvent* pEvent) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkGetEventStatus( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkSetEvent( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetEvent( + VkDevice device, + VkEvent event) +{ + return VK_UNSUPPORTED; +} + +// Query functions + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult VKAPI vkCreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + VkQueryPool* pQueryPool) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16); + if (result != VK_SUCCESS) + goto fail; + + *pQueryPool = (VkQueryPool) pool; + + return VK_SUCCESS; + + fail: + anv_device_free(device, pool); + + return result; +} + +VkResult VKAPI vkGetQueryPoolResults( + VkDevice device, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + size_t* pDataSize, + void* pData, + VkQueryResultFlags flags) +{ + return VK_UNSUPPORTED; +} + +// Format capabilities + +VkResult VKAPI vkGetFormatInfo( + VkDevice device, + VkFormat format, + VkFormatInfoType infoType, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +// Buffer functions + +VkResult VKAPI vkCreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + VkBuffer* pBuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_device_alloc(device, sizeof(*buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->mem = NULL; + buffer->offset = 0; + + *pBuffer = (VkBuffer) buffer; + + return VK_SUCCESS; +} + +// Buffer view functions + +VkResult VKAPI vkCreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_buffer_view *view; + const struct anv_format *format; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->buffer = (struct anv_buffer *) pCreateInfo->buffer; + view->offset = pCreateInfo->offset; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + format = anv_format_for_vk_format(pCreateInfo->format); + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = pCreateInfo->range / stride; + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format->format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .BaseMipLevel = 0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, view->buffer->offset + view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); + + *pView = (VkImageView) view; + + return VK_SUCCESS; +} + +// Sampler functions + +struct anv_sampler { + uint32_t state[4]; +}; + +VkResult VKAPI vkCreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = 0, + .TextureBorderColorMode = 0, + .LODPreClampMode = 0, + .BaseMipLevel = 0, + .MipModeFilter = 0, + .MagModeFilter = 0, + .MinModeFilter = 0, + .TextureLODBias = 0, + .AnisotropicAlgorithm = 0, + .MinLOD = 0, + .MaxLOD = 0, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = 0, + .CubeSurfaceControlMode = 0, + .IndirectStatePointer = 0, + .LODClampMagnificationMode = 0, + .MaximumAnisotropy = 0, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = 0, + .TCYAddressControlMode = 0, + .TCZAddressControlMode = 0, + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = (VkSampler) sampler; + + return VK_SUCCESS; +} + +// Descriptor set functions + +VkResult VKAPI vkCreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayout* pSetLayout) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_descriptor_set_layout *set_layout; + uint32_t count, k; + size_t size, total; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + count = 0; + for (uint32_t i = 0; i < pCreateInfo->count; i++) + count += pCreateInfo->pBinding[i].count; + + size = sizeof(*set_layout) + + count * sizeof(set_layout->bindings[0]); + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + k = 0; + total = 0; + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) { + set_layout->bindings[k].mask = pCreateInfo->pBinding[i].stageFlags; + set_layout->bindings[k].type = pCreateInfo->pBinding[i].descriptorType; + k++; + } + + total += pCreateInfo->pBinding[i].count * + __builtin_popcount(pCreateInfo->pBinding[i].stageFlags); + } + + set_layout->total = total; + set_layout->count = count; + + *pSetLayout = (VkDescriptorSetLayout) set_layout; + + return VK_SUCCESS; +} + +VkResult VKAPI vkBeginDescriptorPoolUpdate( + VkDevice device, + VkDescriptorUpdateMode updateMode) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkEndDescriptorPoolUpdate( + VkDevice device, + VkCmdBuffer cmd) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateDescriptorPool( + VkDevice device, + VkDescriptorPoolUsage poolUsage, + uint32_t maxSets, + const VkDescriptorPoolCreateInfo* pCreateInfo, + VkDescriptorPool* pDescriptorPool) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkAllocDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorSetUsage setUsage, + uint32_t count, + const VkDescriptorSetLayout* pSetLayouts, + VkDescriptorSet* pDescriptorSets, + uint32_t* pCount) +{ + struct anv_device *device = (struct anv_device *) _device; + const struct anv_descriptor_set_layout *layout; + struct anv_descriptor_set *set; + size_t size; + + for (uint32_t i = 0; i < count; i++) { + layout = (struct anv_descriptor_set_layout *) pSetLayouts[i]; + size = sizeof(*set) + layout->total * sizeof(set->descriptors[0]); + set = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) { + *pCount = i; + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pDescriptorSets[i] = (VkDescriptorSet) set; + } + + *pCount = count; + + return VK_UNSUPPORTED; +} + +void VKAPI vkClearDescriptorSets( + VkDevice device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ +} + +void VKAPI vkUpdateDescriptors( + VkDevice _device, + VkDescriptorSet descriptorSet, + uint32_t updateCount, + const void** ppUpdateArray) +{ + struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet; + VkUpdateSamplers *update_samplers; + VkUpdateSamplerTextures *update_sampler_textures; + VkUpdateImages *update_images; + VkUpdateBuffers *update_buffers; + VkUpdateAsCopy *update_as_copy; + + for (uint32_t i = 0; i < updateCount; i++) { + const struct anv_common *common = ppUpdateArray[i]; + + switch (common->sType) { + case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS: + update_samplers = (VkUpdateSamplers *) common; + + for (uint32_t j = 0; j < update_samplers->count; j++) { + set->descriptors[update_samplers->binding + j] = + (void *) update_samplers->pSamplers[j]; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES: + /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */ + update_sampler_textures = (VkUpdateSamplerTextures *) common; + + for (uint32_t j = 0; j < update_sampler_textures->count; j++) { + set->descriptors[update_sampler_textures->binding + j] = + (void *) update_sampler_textures->pSamplerImageViews[j].pImageView->view; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_IMAGES: + update_images = (VkUpdateImages *) common; + + for (uint32_t j = 0; j < update_images->count; j++) { + set->descriptors[update_images->binding + j] = + (void *) update_images->pImageViews[j].view; + } + break; + + case VK_STRUCTURE_TYPE_UPDATE_BUFFERS: + update_buffers = (VkUpdateBuffers *) common; + + for (uint32_t j = 0; j < update_buffers->count; j++) { + set->descriptors[update_buffers->binding + j] = + (void *) update_buffers->pBufferViews[j].view; + } + /* FIXME: descriptor arrays? */ + break; + + case VK_STRUCTURE_TYPE_UPDATE_AS_COPY: + update_as_copy = (VkUpdateAsCopy *) common; + (void) update_as_copy; + break; + + default: + break; + } + } +} + +// State object functions + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +VkResult VKAPI vkCreateDynamicViewportState( + VkDevice _device, + const VkDynamicVpStateCreateInfo* pCreateInfo, + VkDynamicVpState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_vp_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + unsigned count = pCreateInfo->viewportAndScissorCount; + state->sf_clip_vp = anv_state_pool_alloc(&device->dyn_state_pool, + count * 64, 64); + state->cc_vp = anv_state_pool_alloc(&device->dyn_state_pool, + count * 8, 32); + state->scissor = anv_state_pool_alloc(&device->dyn_state_pool, + count * 32, 32); + + for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { + const VkViewport *vp = &pCreateInfo->pViewports[i]; + const VkRect *s = &pCreateInfo->pScissors[i]; + + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN8_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN8_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN8_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); + GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); + } else { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); + } + } + + *pState = (VkDynamicVpState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicRasterState( + VkDevice _device, + const VkDynamicRsStateCreateInfo* pCreateInfo, + VkDynamicRsState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Missing these: + * float depthBias; + * float depthBiasClamp; + * float slopeScaledDepthBias; + * float pointFadeThreshold; + * // optional (GL45) - Size of point fade threshold + */ + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + .PointWidth = pCreateInfo->pointSize, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + *pState = (VkDynamicRsState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicColorBlendState( + VkDevice _device, + const VkDynamicCbStateCreateInfo* pCreateInfo, + VkDynamicCbState* pState) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_dynamic_cb_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *pState = (VkDynamicCbState) state; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDynamicDepthStencilState( + VkDevice device, + const VkDynamicDsStateCreateInfo* pCreateInfo, + VkDynamicDsState* pState) +{ + return VK_UNSUPPORTED; +} + +// Command buffer functions + +VkResult VKAPI vkCreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + + result = anv_batch_init(&cmd_buffer->batch, device); + if (result != VK_SUCCESS) + goto fail; + + cmd_buffer->exec2_objects = + anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer->exec2_objects == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_batch; + } + + cmd_buffer->exec2_bos = + anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_bos[0]), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer->exec2_bos == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_exec2_objects; + } + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + + cmd_buffer->dirty = 0; + cmd_buffer->vb_dirty = 0; + + *pCmdBuffer = (VkCmdBuffer) cmd_buffer; + + return VK_SUCCESS; + + fail_exec2_objects: + anv_device_free(device, cmd_buffer->exec2_objects); + fail_batch: + anv_batch_finish(&cmd_buffer->batch, device); + fail: + anv_device_free(device, cmd_buffer); + + return result; +} + +VkResult VKAPI vkBeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_device *device = cmd_buffer->device; + + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_SIP); + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { NULL, 0 }, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 }, + .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dyn_state_block_pool.bo, 0 }, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = true); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + /* Hardcoded state: */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .Width = 1, + .Height = 1, + .SurfaceFormat = D16_UNORM, + .SurfaceBaseAddress = { NULL, 0 }, + .HierarchicalDepthBufferEnable = 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_WM_DEPTH_STENCIL, + .DepthTestEnable = false, + .DepthBufferWriteEnable = false); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, struct anv_reloc_list *list) +{ + struct drm_i915_gem_exec_object2 *obj; + + bo->index = cmd_buffer->bo_count; + obj = &cmd_buffer->exec2_objects[bo->index]; + cmd_buffer->exec2_bos[bo->index] = bo; + cmd_buffer->bo_count++; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + + if (list) { + obj->relocation_count = list->num_relocs; + obj->relocs_ptr = (uintptr_t) list->relocs; + } +} + +static void +anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo, *batch_bo; + + batch_bo = &cmd_buffer->batch.bo; + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + /* Skip any relocations targeting the batch bo. We need to make sure + * it's the last in the list so we'll add it manually later. + */ + if (bo == batch_bo) + continue; + if (bo->index < cmd_buffer->bo_count && cmd_buffer->exec2_bos[bo->index] == bo) + continue; + + anv_cmd_buffer_add_bo(cmd_buffer, bo, NULL); + } +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +VkResult VKAPI vkEndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_device *device = cmd_buffer->device; + struct anv_batch *batch = &cmd_buffer->batch; + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((batch->next - batch->bo.map) & 4) + anv_batch_emit(batch, GEN8_MI_NOOP); + + cmd_buffer->bo_count = 0; + cmd_buffer->need_reloc = false; + + /* Lock for access to bo->index. */ + pthread_mutex_lock(&device->mutex); + + /* Add block pool bos first so we can add them with their relocs. */ + anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo, + &batch->surf_relocs); + + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); + + cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; + cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count; + cmd_buffer->execbuf.batch_start_offset = 0; + cmd_buffer->execbuf.batch_len = batch->next - batch->bo.map; + cmd_buffer->execbuf.cliprects_ptr = 0; + cmd_buffer->execbuf.num_cliprects = 0; + cmd_buffer->execbuf.DR1 = 0; + cmd_buffer->execbuf.DR4 = 0; + + cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT; + if (!cmd_buffer->need_reloc) + cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC; + cmd_buffer->execbuf.flags |= I915_EXEC_RENDER; + cmd_buffer->execbuf.rsvd1 = device->context_id; + cmd_buffer->execbuf.rsvd2 = 0; + + pthread_mutex_unlock(&device->mutex); + + return VK_SUCCESS; +} + +VkResult VKAPI vkResetCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_batch_reset(&cmd_buffer->batch); + + return VK_SUCCESS; +} + +// Command buffer building functions + +void VKAPI vkCmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + cmd_buffer->pipeline = (struct anv_pipeline *) _pipeline; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; +} + +void VKAPI vkCmdBindDynamicStateObject( + VkCmdBuffer cmdBuffer, + VkStateBindPoint stateBindPoint, + VkDynamicStateObject dynamicState) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_dynamic_vp_state *vp_state; + + switch (stateBindPoint) { + case VK_STATE_BIND_POINT_VIEWPORT: + vp_state = (struct anv_dynamic_vp_state *) dynamicState; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + break; + case VK_STATE_BIND_POINT_RASTER: + cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState; + cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY; + break; + case VK_STATE_BIND_POINT_COLOR_BLEND: + case VK_STATE_BIND_POINT_DEPTH_STENCIL: + break; + default: + break; + }; +} + +void VKAPI vkCmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + /* What are the semantics for setting descriptor sets? Assuming that + * setting preserves lower sets and invalidate higher sets. This means that + * we can set the number of active sets to firstSet + setCount. + */ + + for (uint32_t i = 0; i < setCount; i++) + cmd_buffer->descriptor_sets[firstSet + i] = + (struct anv_descriptor_set *) pDescriptorSets[i]; + + cmd_buffer->num_descriptor_sets = firstSet + setCount; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + +void VKAPI vkCmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT8] = INDEX_BYTE, + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = 0, + .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} + +void VKAPI vkCmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + for (uint32_t i = 0; i < bindingCount; i++) { + cmd_buffer->vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i]; + cmd_buffer->vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->vb_dirty |= 1 << (startBinding + i); + } +} + +static void +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_framebuffer *framebuffer = cmd_buffer->framebuffer; + + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + + uint32_t bias = s == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + uint32_t count, *table; + struct anv_state table_state; + + if (layout) + count = layout->stage[s].count + bias; + else if (s == VK_SHADER_STAGE_FRAGMENT) + count = framebuffer->color_attachment_count; + else + count = 0; + + if (count == 0) + continue; + + table_state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, + count * 4, 32); + table = table_state.map; + + if (s == VK_SHADER_STAGE_FRAGMENT) { + for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { + struct anv_color_attachment_view *view = framebuffer->color_attachments[i]; + table[i] = view->surface_state.offset; + + /* Don't write the reloc back to the surface state. We do that at + * submit time. Surface address is dwords 8-9. */ + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + view->surface_state.offset + 8 * sizeof(int32_t), + &view->image->mem->bo, view->image->offset); + } + } + + if (layout) { + for (uint32_t i = 0; i < layout->stage[s].count; i++) { + struct anv_pipeline_layout_entry *e = &layout->stage[s].entries[i]; + struct anv_image_view *image_view; + struct anv_buffer_view *buffer_view; + void *d = cmd_buffer->descriptor_sets[e->set]->descriptors[e->index]; + + switch (e->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + image_view = d; + table[bias + i] = image_view->surface_state.offset; + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + image_view->surface_state.offset + 8 * sizeof(int32_t), + &image_view->image->mem->bo, + image_view->image->offset); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + /* FIXME: What are these? TBOs? */ + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + buffer_view = d; + table[bias + i] = buffer_view->surface_state.offset; + anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, + buffer_view->surface_state.offset + 8 * sizeof(int32_t), + &buffer_view->buffer->mem->bo, + buffer_view->buffer->offset + buffer_view->offset); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + break; + default: + break; + } + } + } + + /* FIXME: Samplers */ + + /* The binding table pointer commands all have the same structure, only + * the opcode differs. + */ + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = opcodes[s], + .PointertoVSBindingTable = table_state.offset); + } +} + +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->pipeline; + const uint32_t num_buffers = __builtin_popcount(cmd_buffer->vb_dirty); + const uint32_t num_dwords = 1 + num_buffers * 4; + uint32_t *p; + + if (cmd_buffer->vb_dirty) { + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, cmd_buffer->vb_dirty) { + struct anv_buffer *buffer = cmd_buffer->vb[vb].buffer; + uint32_t offset = cmd_buffer->vb[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = 0, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { &buffer->mem->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if (cmd_buffer->dirty & ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + /* maybe: anv_batch_merge(batch, GEN8_3DSTATE_SF, a, b) */ + uint32_t *dw; + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_3DSTATE_SF_length); + for (uint32_t i = 0; i < GEN8_3DSTATE_SF_length; i++) + dw[i] = cmd_buffer->rs_state->state_sf[i] | pipeline->state_sf[i]; + } + + cmd_buffer->vb_dirty = 0; + cmd_buffer->dirty = 0; +} + +void VKAPI vkCmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void VKAPI vkCmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void VKAPI vkCmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = &buffer->mem->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void VKAPI vkCmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = &buffer->mem->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void VKAPI vkCmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ +} + +void VKAPI vkCmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ +} + +void VKAPI vkCmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipeEvent pipeEvent) +{ +} + +void VKAPI vkCmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipeEvent pipeEvent) +{ +} + +void VKAPI vkCmdWaitEvents( + VkCmdBuffer cmdBuffer, + VkWaitEvent waitEvent, + uint32_t eventCount, + const VkEvent* pEvents, + uint32_t memBarrierCount, + const void** ppMemBarriers) +{ +} + +void VKAPI vkCmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkWaitEvent waitEvent, + uint32_t pipeEventCount, + const VkPipeEvent* pPipeEvents, + uint32_t memBarrierCount, + const void** ppMemBarriers) +{ +} + +static void +anv_batch_emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void VKAPI vkCmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + break; + + default: + break; + } +} + +void VKAPI vkCmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + break; + + default: + break; + } +} + +void VKAPI vkCmdResetQueryPool( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount) +{ +} + +#define TIMESTAMP 0x44070 + +void VKAPI vkCmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + struct anv_bo *bo = &buffer->mem->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +void VKAPI vkCmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ +} + +void VKAPI vkCmdInitAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + const uint32_t* pData) +{ +} + +void VKAPI vkCmdLoadAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + VkBuffer srcBuffer, + VkDeviceSize srcOffset) +{ +} + +void VKAPI vkCmdSaveAtomicCounters( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + uint32_t startCounter, + uint32_t counterCount, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ +} + +VkResult VKAPI vkCreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + VkFramebuffer* pFramebuffer) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount; + for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) { + framebuffer->color_attachments[i] = + (struct anv_color_attachment_view *) pCreateInfo->pColorAttachments[i].view; + } + + if (pCreateInfo->pDepthStencilAttachment) { + framebuffer->depth_stencil = + (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view; + } + + framebuffer->sample_count = pCreateInfo->sampleCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = (VkFramebuffer) framebuffer; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + VkRenderPass* pRenderPass) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_render_pass *pass; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + pass = anv_device_alloc(device, sizeof(*pass), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pass->render_area = pCreateInfo->renderArea; + + *pRenderPass = (VkRenderPass) pass; + + return VK_SUCCESS; +} + +void VKAPI vkCmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBegin* pRenderPassBegin) +{ + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass; + + cmd_buffer->framebuffer = (struct anv_framebuffer *) pRenderPassBegin->framebuffer; + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = pass->render_area.offset.y, + .ClippedDrawingRectangleXMin = pass->render_area.offset.x, + .ClippedDrawingRectangleYMax = + pass->render_area.offset.y + pass->render_area.extent.height - 1, + .ClippedDrawingRectangleXMax = + pass->render_area.offset.x + pass->render_area.extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); +} + +void VKAPI vkCmdEndRenderPass( + VkCmdBuffer cmdBuffer, + VkRenderPass renderPass) +{ +} diff --git a/src/vulkan/gem.c b/src/vulkan/gem.c new file mode 100644 index 00000000000..5cc5e5d8e84 --- /dev/null +++ b/src/vulkan/gem.c @@ -0,0 +1,283 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + VG_CLEAR(gem_mmap); + gem_mmap.handle = gem_handle; + gem_mmap.offset = offset; + gem_mmap.size = size; + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + if (ret == -1) + return -errno; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = I915_TILING_X; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(struct anv_device *device, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h new file mode 100644 index 00000000000..c15afe9b266 --- /dev/null +++ b/src/vulkan/gen8_pack.h @@ -0,0 +1,8702 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for BDW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#define GEN8_3DSTATE_URB_VS_length 0x00000002 +#define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VS_length 0x00000009 +#define GEN8_3DSTATE_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t AccessesUAV; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t SIMD8DispatchEnable; + uint32_t VertexCacheDisable; + uint32_t FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN8_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddressHigh; +}; + +static inline void +GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddressHigh, dw1); + +} + +#define GEN8_MI_ATOMIC_length 0x00000003 +#define GEN8_MI_ATOMIC_length_bias 0x00000002 +#define GEN8_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +struct GEN8_MI_ATOMIC { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t MemoryAddressHigh; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[2] = + __gen_field(values->MemoryAddressHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 +#define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +struct GEN8_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +struct GEN8_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 +#define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +struct GEN8_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + +} + +#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 +#define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_PIPELINE_SELECT_length 0x00000001 +#define GEN8_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN8_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +struct GEN8_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 +#define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 14 + +struct GEN8_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + uint32_t GeneralStateMemoryObjectControlState; + uint32_t GeneralStateBaseAddressModifyEnable; + uint32_t StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + uint32_t SurfaceStateMemoryObjectControlState; + uint32_t SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + uint32_t DynamicStateMemoryObjectControlState; + uint32_t DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + uint32_t IndirectObjectMemoryObjectControlState; + uint32_t IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + uint32_t InstructionMemoryObjectControlState; + uint32_t InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + uint32_t GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + uint32_t DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + uint32_t IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + uint32_t InstructionBuffersizeModifyEnable; +}; + +static inline void +GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct GeneralStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[3] = + /* Struct StatelessDataPortAccessMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + uint32_t dw4 = + /* Struct SurfaceStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + uint32_t dw6 = + /* Struct DynamicStateMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + uint32_t dw8 = + /* Struct IndirectObjectMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + uint32_t dw10 = + /* Struct InstructionMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[10] = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + +} + +#define GEN8_STATE_PREFETCH_length 0x00000002 +#define GEN8_STATE_PREFETCH_length_bias 0x00000002 +#define GEN8_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN8_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN8_STATE_SIP_length 0x00000003 +#define GEN8_STATE_SIP_length_bias 0x00000002 +#define GEN8_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +struct GEN8_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + +} + +#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +struct GEN8_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + uint32_t SWTessellationMemoryObjectControlState; + __gen_address_type SWTessellationBaseAddressHigh; +}; + +static inline void +GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + /* Struct SWTessellationMemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SWTessellationBaseAddressHigh, dw2); + +} + +#define GEN8_3DPRIMITIVE_length 0x00000007 +#define GEN8_3DPRIMITIVE_length_bias 0x00000002 +#define GEN8_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +struct GEN8_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AAPointCoverageBias; + uint32_t AACoverageBias; + uint32_t AAPointCoverageSlope; + uint32_t AACoverageSlope; + uint32_t AAPointCoverageEndCapBias; + uint32_t AACoverageEndCapBias; + uint32_t AAPointCoverageEndCapSlope; + uint32_t AACoverageEndCapSlope; +}; + +static inline void +GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias, 24, 31) | + __gen_field(values->AACoverageBias, 16, 23) | + __gen_field(values->AAPointCoverageSlope, 8, 15) | + __gen_field(values->AACoverageSlope, 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias, 24, 31) | + __gen_field(values->AACoverageEndCapBias, 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope, 8, 15) | + __gen_field(values->AACoverageEndCapSlope, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + uint32_t SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + /* Struct SurfaceObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + uint32_t BlendStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + uint32_t ColorCalcStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 +#define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN8_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 +#define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN8_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + uint32_t DepthClearValueValid; +}; + +static inline void +GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CLIP_length 0x00000004 +#define GEN8_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + uint32_t ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + uint32_t EarlyCullEnable; +#define Normal 0 +#define Force 1 + uint32_t ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + uint32_t ForceClipMode; + uint32_t ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + uint32_t ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + uint32_t ViewportXYClipTestEnable; + uint32_t GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + uint32_t PerspectiveDivideDisable; + uint32_t NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + uint32_t MinimumPointWidth; + uint32_t MaximumPointWidth; + uint32_t ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth, 17, 27) | + __gen_field(values->MaximumPointWidth, 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b +#define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t ConstantBufferObjectControlState; + uint32_t DwordLength; + uint32_t ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + /* Struct ConstantBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct ConstantBody: found 3DSTATE_CONSTANT(Body) */ + 0; + +} + +#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 +#define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +struct GEN8_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t DepthWriteEnable; + uint32_t StencilWriteEnable; + uint32_t HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + uint32_t DepthBufferObjectControlState; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + /* Struct DepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_DS_length 0x00000009 +#define GEN8_3DSTATE_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t AccessesUAV; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + uint32_t StatisticsEnable; + uint32_t SIMD8DispatchEnable; + uint32_t ComputeWCoordinateEnable; + uint32_t CacheDisable; + uint32_t FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 3, 3) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +struct GEN8_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +struct GEN8_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +struct GEN8_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +struct GEN8_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +struct GEN8_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + uint32_t ConstantBufferDx9GenerateStall; + uint32_t ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + uint32_t GatherPoolEnable; + uint32_t MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_GS_length 0x0000000a +#define GEN8_3DSTATE_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +struct GEN8_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t AccessesUAV; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + uint32_t IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + uint32_t StatisticsEnable; + uint32_t InvocationsIncrementValue; + uint32_t IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t DiscardAdjacency; + uint32_t Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + uint32_t StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct HierarchicalDepthBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_HS_length 0x00000009 +#define GEN8_3DSTATE_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t Enable; + uint32_t StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + uint32_t AccessesUAV; + uint32_t IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; +}; + +static inline void +GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[5] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[7] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[8] = + 0; + +} + +#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 +#define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + uint32_t LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount, 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PatternRow, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_PS_length 0x0000000c +#define GEN8_3DSTATE_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +struct GEN8_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + uint32_t PushConstantEnable; + uint32_t RenderTargetFastClearEnable; + uint32_t RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + uint32_t _32PixelDispatchEnable; + uint32_t _16PixelDispatchEnable; + uint32_t _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + dw[8] = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[10] = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + +} + +#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AlphaToCoverageEnable; + uint32_t HasWriteableRT; + uint32_t ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t AlphaTestEnable; + uint32_t IndependentAlphaBlendEnable; +}; + +static inline void +GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelShaderValid; + uint32_t PixelShaderDoesnotwritetoRT; + uint32_t oMaskPresenttoRenderTarget; + uint32_t PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + uint32_t ForceComputedDepth; + uint32_t PixelShaderUsesSourceDepth; + uint32_t PixelShaderUsesSourceW; + uint32_t Removed; + uint32_t AttributeEnable; + uint32_t PixelShaderDisablesAlphaToCoverage; + uint32_t PixelShaderIsPerSample; + uint32_t PixelShaderHasUAV; + uint32_t PixelShaderUsesInputCoverageMask; +}; + +static inline void +GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 1, 1) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_RASTER_length 0x00000005 +#define GEN8_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN8_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define NUMRASTSAMPLES_0 0 +#define NUMRASTSAMPLES_1 1 +#define NUMRASTSAMPLES_2 2 +#define NUMRASTSAMPLES_4 3 +#define NUMRASTSAMPLES_8 4 +#define NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + uint32_t SmoothPointEnable; + uint32_t DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + uint32_t GlobalDepthOffsetEnableSolid; + uint32_t GlobalDepthOffsetEnableWireframe; + uint32_t GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + uint32_t AntialiasingEnable; + uint32_t ScissorRectangleEnable; + uint32_t ViewportZClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 +#define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +struct GEN8_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t _8xSample7XOffset; + uint32_t _8xSample7YOffset; + uint32_t _8xSample6XOffset; + uint32_t _8xSample6YOffset; + uint32_t _8xSample5XOffset; + uint32_t _8xSample5YOffset; + uint32_t _8xSample4XOffset; + uint32_t _8xSample4YOffset; + uint32_t _8xSample3XOffset; + uint32_t _8xSample3YOffset; + uint32_t _8xSample2XOffset; + uint32_t _8xSample2YOffset; + uint32_t _8xSample1XOffset; + uint32_t _8xSample1YOffset; + uint32_t _8xSample0XOffset; + uint32_t _8xSample0YOffset; + uint32_t _4xSample3XOffset; + uint32_t _4xSample3YOffset; + uint32_t _4xSample2XOffset; + uint32_t _4xSample2YOffset; + uint32_t _4xSample1XOffset; + uint32_t _4xSample1YOffset; + uint32_t _4xSample0XOffset; + uint32_t _4xSample0YOffset; + uint32_t _1xSample0XOffset; + uint32_t _1xSample0YOffset; + uint32_t _2xSample1XOffset; + uint32_t _2xSample1YOffset; + uint32_t _2xSample0XOffset; + uint32_t _2xSample0YOffset; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + 0; + + dw[5] = + __gen_field(values->_8xSample7XOffset, 28, 31) | + __gen_field(values->_8xSample7YOffset, 24, 27) | + __gen_field(values->_8xSample6XOffset, 20, 23) | + __gen_field(values->_8xSample6YOffset, 16, 19) | + __gen_field(values->_8xSample5XOffset, 12, 15) | + __gen_field(values->_8xSample5YOffset, 8, 11) | + __gen_field(values->_8xSample4XOffset, 4, 7) | + __gen_field(values->_8xSample4YOffset, 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset, 28, 31) | + __gen_field(values->_8xSample3YOffset, 24, 27) | + __gen_field(values->_8xSample2XOffset, 20, 23) | + __gen_field(values->_8xSample2YOffset, 16, 19) | + __gen_field(values->_8xSample1XOffset, 12, 15) | + __gen_field(values->_8xSample1YOffset, 8, 11) | + __gen_field(values->_8xSample0XOffset, 4, 7) | + __gen_field(values->_8xSample0YOffset, 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset, 28, 31) | + __gen_field(values->_4xSample3YOffset, 24, 27) | + __gen_field(values->_4xSample2XOffset, 20, 23) | + __gen_field(values->_4xSample2YOffset, 16, 19) | + __gen_field(values->_4xSample1XOffset, 12, 15) | + __gen_field(values->_4xSample1YOffset, 8, 11) | + __gen_field(values->_4xSample0XOffset, 4, 7) | + __gen_field(values->_4xSample0YOffset, 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset, 20, 23) | + __gen_field(values->_1xSample0YOffset, 16, 19) | + __gen_field(values->_2xSample1XOffset, 12, 15) | + __gen_field(values->_2xSample1YOffset, 8, 11) | + __gen_field(values->_2xSample0XOffset, 4, 7) | + __gen_field(values->_2xSample0YOffset, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SBE_length 0x00000004 +#define GEN8_3DSTATE_SBE_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ForceVertexURBEntryReadLength; + uint32_t ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + uint32_t AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t PrimitiveIDOverrideComponentW; + uint32_t PrimitiveIDOverrideComponentZ; + uint32_t PrimitiveIDOverrideComponentY; + uint32_t PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; +}; + +static inline void +GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b +#define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +struct GEN8_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t Attribute; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute09WrapShortestEnables; + uint32_t Attribute08WrapShortestEnables; + uint32_t Attribute07WrapShortestEnables; + uint32_t Attribute06WrapShortestEnables; + uint32_t Attribute05WrapShortestEnables; + uint32_t Attribute04WrapShortestEnables; + uint32_t Attribute03WrapShortestEnables; + uint32_t Attribute02WrapShortestEnables; + uint32_t Attribute01WrapShortestEnables; + uint32_t Attribute00WrapShortestEnables; +}; + +static inline void +GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + /* Struct Attribute: found SF_OUTPUT_ATTRIBUTE_DETAIL */ + 0; + + dw[9] = + __gen_field(values->Attribute15WrapShortestEnables, 60, 63) | + __gen_field(values->Attribute14WrapShortestEnables, 56, 59) | + __gen_field(values->Attribute13WrapShortestEnables, 52, 55) | + __gen_field(values->Attribute12WrapShortestEnables, 48, 51) | + __gen_field(values->Attribute11WrapShortestEnables, 44, 47) | + __gen_field(values->Attribute10WrapShortestEnables, 40, 43) | + __gen_field(values->Attribute09WrapShortestEnables, 36, 39) | + __gen_field(values->Attribute08WrapShortestEnables, 32, 35) | + __gen_field(values->Attribute07WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute06WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute05WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute04WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute03WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute02WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute01WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute00WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SF_length 0x00000004 +#define GEN8_3DSTATE_SF_length_bias 0x00000002 +#define GEN8_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t LegacyGlobalDepthBiasEnable; + uint32_t StatisticsEnable; + uint32_t ViewportTransformEnable; + uint32_t LineWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + uint32_t PointWidth; +}; + +static inline void +GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineWidth, 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth, 0, 10) | + 0; + +} + +#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 +#define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +struct GEN8_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferEnable; + uint32_t SOBufferIndex; + uint32_t SOBufferObjectControlState; + uint32_t StreamOffsetWriteEnable; + uint32_t StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + /* Struct SOBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +struct GEN8_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 +#define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + uint32_t StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + /* Struct StencilBufferObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 +#define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN8_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + uint32_t SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN8_3DSTATE_TE_length 0x00000004 +#define GEN8_3DSTATE_TE_length_bias 0x00000002 +#define GEN8_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +struct GEN8_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define LINE 1 +#define TRI_CW 2 +#define TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + uint32_t TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN8_3DSTATE_URB_DS_length 0x00000002 +#define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_GS_length 0x00000002 +#define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_HS_length 0x00000002 +#define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +struct GEN8_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +struct GEN8_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VF_length 0x00000002 +#define GEN8_3DSTATE_VF_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 +#define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + uint32_t VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +struct GEN8_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t StatisticsEnable; +}; + +static inline void +GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_length 0x00000002 +#define GEN8_3DSTATE_WM_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StatisticsEnable; + uint32_t LegacyDepthBufferClearEnable; + uint32_t LegacyDepthBufferResolveEnable; + uint32_t LegacyHierarchicalDepthBufferResolveEnable; + uint32_t LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + uint32_t PolygonStippleEnable; + uint32_t LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +struct GEN8_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyKillEnable; +}; + +static inline void +GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 1 + +struct GEN8_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; +#define False 0 +#define True 1 + uint32_t DoubleSidedStencilEnable; + uint32_t StencilTestEnable; + uint32_t StencilBufferWriteEnable; + uint32_t DepthTestEnable; + uint32_t DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; +}; + +static inline void +GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 +#define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +struct GEN8_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferClearEnable; + uint32_t DepthBufferClearEnable; + uint32_t ScissorRectangleEnable; + uint32_t DepthBufferResolveEnable; + uint32_t HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + uint32_t FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_GPGPU_WALKER_length 0x0000000f +#define GEN8_GPGPU_WALKER_length_bias 0x00000002 +#define GEN8_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +struct GEN8_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t IndirectParameterEnable; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 +#define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +struct GEN8_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +struct GEN8_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +struct GEN8_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + uint32_t ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 +#define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +struct GEN8_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; + uint32_t PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData; +}; + +static inline void +GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->InlineData, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +struct GEN8_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + uint32_t ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +struct GEN8_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN8_MEDIA_VFE_STATE_length 0x00000009 +#define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN8_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +struct GEN8_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN8_MI_ARB_CHECK_length 0x00000001 +#define GEN8_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN8_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +struct GEN8_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +struct GEN8_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 +#define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN8_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +struct GEN8_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + uint32_t AddOffsetEnable; + uint32_t PredicationEnable; + uint32_t ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; + __gen_address_type BatchBufferStartAddressHigh; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BatchBufferStartAddressHigh, dw2); + +} + +#define GEN8_MI_CLFLUSH_length_bias 0x00000002 +#define GEN8_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +struct GEN8_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 1 + +struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; + __gen_address_type CompareAddressHigh; +}; + +static inline void +GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->CompareAddressHigh, dw3); + +} + +#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 +#define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN8_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +struct GEN8_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + +} + +#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 +#define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +struct GEN8_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 +#define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +struct GEN8_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +struct GEN8_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +struct GEN8_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + uint32_t ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 +#define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +struct GEN8_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_MATH_length_bias 0x00000002 +#define GEN8_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +struct GEN8_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MI_NOOP_length 0x00000001 +#define GEN8_MI_NOOP_length_bias 0x00000001 +#define GEN8_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +struct GEN8_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN8_MI_PREDICATE_length 0x00000001 +#define GEN8_MI_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +struct GEN8_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define KEEP 0 +#define LOAD 2 +#define LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN8_MI_REPORT_HEAD_length 0x00000001 +#define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN8_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +struct GEN8_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_RS_CONTEXT_length 0x00000001 +#define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN8_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +struct GEN8_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Restore 0 +#define Save 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_CONTROL_length 0x00000001 +#define GEN8_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN8_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +struct GEN8_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define Stop 0 +#define Start 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 +#define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +struct GEN8_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN8_MI_SET_CONTEXT_length 0x00000002 +#define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN8_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +struct GEN8_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + uint32_t CoreModeEnable; + uint32_t ResourceStreamerStateSaveEnable; + uint32_t ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN8_MI_SET_PREDICATE_length 0x00000001 +#define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +struct GEN8_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 +#define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 +#define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +struct GEN8_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_URB_MEM_length 0x00000004 +#define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +struct GEN8_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +struct GEN8_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t SuspendFlush; +}; + +static inline void +GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +struct GEN8_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN8_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN8_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +struct GEN8_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +struct GEN8_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN8_MI_URB_CLEAR_length 0x00000002 +#define GEN8_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN8_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +struct GEN8_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_field(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN8_MI_USER_INTERRUPT_length 0x00000001 +#define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN8_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +struct GEN8_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +struct GEN8_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DisplayPipeCVerticalBlankWaitEnable; + uint32_t DisplaySpriteCFlipPendingWaitEnable; + uint32_t DisplayPlaneCFlipPendingWaitEnable; + uint32_t DisplayPipeCScanLineWaitEnable; + uint32_t DisplayPipeBVerticalBlankWaitEnable; + uint32_t DisplaySpriteBFlipPendingWaitEnable; + uint32_t DisplayPlaneBFlipPendingWaitEnable; + uint32_t DisplayPipeBScanLineWaitEnable; + uint32_t DisplayPipeAVerticalBlankWaitEnable; + uint32_t DisplaySpriteAFlipPendingWaitEnable; + uint32_t DisplayPlaneAFlipPendingWaitEnable; + uint32_t DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN8_PIPE_CONTROL_length 0x00000006 +#define GEN8_PIPE_CONTROL_length_bias 0x00000002 +#define GEN8_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +struct GEN8_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + uint32_t GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + uint32_t DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + uint32_t RenderTargetCacheFlushEnable; + uint32_t InstructionCacheInvalidateEnable; + uint32_t TextureCacheInvalidationEnable; + uint32_t IndirectStatePointersDisable; + uint32_t NotifyEnable; + uint32_t PipeControlFlushEnable; + uint32_t DCFlushEnable; + uint32_t VFCacheInvalidationEnable; + uint32_t ConstantCacheInvalidationEnable; + uint32_t StateCacheInvalidationEnable; + uint32_t StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + uint32_t DepthCacheFlushEnable; + __gen_address_type Address; + __gen_address_type AddressHigh; + uint32_t ImmediateData; +}; + +static inline void +GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->AddressHigh, dw3); + + dw[4] = + __gen_field(values->ImmediateData, 0, 63) | + 0; + +} + +struct GEN8_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + uint32_t dw6 = + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + uint32_t dw8 = + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + +} + +struct GEN8_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN8_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN8_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + uint32_t MemoryObjectControlState; + uint32_t AddressModifyEnable; + uint32_t NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN8_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + uint32_t Valid; + uint32_t SourceElementFormat; + uint32_t EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN8_SO_DECL_ENTRY { + uint32_t Stream3Decl; + uint32_t Stream2Decl; + uint32_t Stream1Decl; + uint32_t Stream0Decl; +}; + +static inline void +GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + /* Struct Stream3Decl: found SO_DECL */ + /* Struct Stream2Decl: found SO_DECL */ + /* Struct Stream1Decl: found SO_DECL */ + /* Struct Stream0Decl: found SO_DECL */ + 0; + +} + +struct GEN8_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { + uint32_t ComponentOverrideW; + uint32_t ComponentOverrideZ; + uint32_t ComponentOverrideY; + uint32_t ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN8_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +struct GEN8_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +struct GEN8_BLEND_STATE { + uint32_t AlphaToCoverageEnable; + uint32_t IndependentAlphaBlendEnable; + uint32_t AlphaToOneEnable; + uint32_t AlphaToCoverageDitherEnable; + uint32_t AlphaTestEnable; + uint32_t AlphaTestFunction; + uint32_t ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t Entry; +}; + +static inline void +GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + dw[1] = + /* Struct Entry: found BLEND_STATE_ENTRY */ + 0; + +} + +struct GEN8_BLEND_STATE_ENTRY { + uint32_t LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + uint32_t PreBlendColorClampEnable; + uint32_t PostBlendColorClampEnable; + uint32_t ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + uint32_t WriteDisableAlpha; + uint32_t WriteDisableRed; + uint32_t WriteDisableGreen; + uint32_t WriteDisableBlue; +}; + +static inline void +GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + +} + +struct GEN8_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +struct GEN8_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +struct GEN8_MEMORY_OBJECT_CONTROL_STATE { +}; + +static inline void +GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + 0; + +} + +struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS { +#define UseCacheabilityControlsfrompagetableUCwithFenceifcoherentcycle 0 +#define UncacheableUCnoncacheable 1 +#define WritethroughWT 2 +#define WritebackWB 3 + uint32_t MemoryTypeLLCeLLCCacheabilityControlLeLLCCC; +#define eLLCOnly 0 +#define LLCOnly 1 +#define LLCeLLCAllowed 2 +#define L3LLCeLLCAllowed 3 + uint32_t TargetCacheTC; + uint32_t EncryptedData; +#define PoorChance 3 +#define NormalChance 2 +#define BetterChance 1 +#define BestChance 0 + uint32_t AgeforQUADLRUAGE; +}; + +static inline void +GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VEB_DI_IECP_COMMAND_SURFACE_CONTROL_BITS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->MemoryTypeLLCeLLCCacheabilityControlLeLLCCC, 5, 6) | + __gen_field(values->TargetCacheTC, 3, 4) | + __gen_field(values->EncryptedData, 2, 2) | + __gen_field(values->AgeforQUADLRUAGE, 0, 1) | + 0; + +} + +struct GEN8_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + uint32_t IllegalOpcodeExceptionEnable; + uint32_t MaskStackExceptionEnable; + uint32_t SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + uint32_t BarrierEnable; +#define Encodes0k 0 +#define Encodes4k 1 +#define Encodes8k 2 +#define Encodes16k 4 +#define Encodes32k 8 +#define Encodes64k 16 + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +struct GEN8_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN8_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + uint32_t SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + uint32_t SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnablePositiveZ; + uint32_t CubeFaceEnableNegativeZ; + uint32_t CubeFaceEnablePositiveY; + uint32_t CubeFaceEnableNegativeY; + uint32_t CubeFaceEnablePositiveX; + uint32_t CubeFaceEnableNegativeX; + uint32_t MemoryObjectControlState; + uint32_t BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + uint32_t EWADisableForCube; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_MCS 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 + uint32_t AuxiliarySurfaceMode; + uint32_t SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + uint32_t ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; +}; + +static inline void +GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + dw[1] = + /* Struct MemoryObjectControlState: found MEMORY_OBJECT_CONTROL_STATE */ + __gen_field(values->BaseMipLevel, 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD, 0, 11) | + 0; + + uint32_t dw8 = + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + 0; + + dw[10] = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[12] = + 0; + + dw[13] = + 0; + + dw[14] = + 0; + + dw[15] = + 0; + +} + +struct GEN8_SAMPLER_STATE { + uint32_t SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + uint32_t BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + uint32_t MinLOD; + uint32_t MaxLOD; + uint32_t ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + uint32_t RAddressMinFilterRoundingEnable; + uint32_t RAddressMagFilterRoundingEnable; + uint32_t VAddressMinFilterRoundingEnable; + uint32_t VAddressMagFilterRoundingEnable; + uint32_t UAddressMinFilterRoundingEnable; + uint32_t UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + uint32_t NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->BaseMipLevel, 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD, 20, 31) | + __gen_field(values->MaxLOD, 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#ifndef _3DPRIM_POINTLIST +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LISTSTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 +#endif + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define X 1 +#define Y 2 +#define XY 3 +#define Z 4 +#define XZ 5 +#define YZ 6 +#define XYZ 7 +#define W 8 +#define XW 9 +#define YW 10 +#define XYW 11 +#define ZW 12 +#define XZW 13 +#define YZW 14 +#define XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Clear Color */ +#define CC_ZERO 0 +#define CC_ONE 1 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + diff --git a/src/vulkan/image.c b/src/vulkan/image.c new file mode 100644 index 00000000000..a5357198225 --- /dev/null +++ b/src/vulkan/image.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +// Image functions + +static const struct anv_format anv_formats[] = { + [VK_FORMAT_UNDEFINED] = { .format = RAW }, + // [VK_FORMAT_R4G4_UNORM] = { .format = R4G4_UNORM }, + // [VK_FORMAT_R4G4_USCALED] = { .format = R4G4_USCALED }, + // [VK_FORMAT_R4G4B4A4_UNORM] = { .format = R4G4B4A4_UNORM }, + // [VK_FORMAT_R4G4B4A4_USCALED] = { .format = R4G4B4A4_USCALED }, + // [VK_FORMAT_R5G6B5_UNORM] = { .format = R5G6B5_UNORM }, + // [VK_FORMAT_R5G6B5_USCALED] = { .format = R5G6B5_USCALED }, + // [VK_FORMAT_R5G5B5A1_UNORM] = { .format = R5G5B5A1_UNORM }, + // [VK_FORMAT_R5G5B5A1_USCALED] = { .format = R5G5B5A1_USCALED }, + [VK_FORMAT_R8_UNORM] = { .format = R8_UNORM, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SNORM] = { .format = R8_SNORM, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_USCALED] = { .format = R8_USCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SSCALED] = { .format = R8_SSCALED, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_UINT] = { .format = R8_UINT, .cpp = 1, .channels = 1 }, + [VK_FORMAT_R8_SINT] = { .format = R8_SINT, .cpp = 1, .channels = 1 }, + // [VK_FORMAT_R8_SRGB] = { .format = R8_SRGB, .cpp = 1 }, + [VK_FORMAT_R8G8_UNORM] = { .format = R8G8_UNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SNORM] = { .format = R8G8_SNORM, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_USCALED] = { .format = R8G8_USCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SSCALED] = { .format = R8G8_SSCALED, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_UINT] = { .format = R8G8_UINT, .cpp = 2, .channels = 2 }, + [VK_FORMAT_R8G8_SINT] = { .format = R8G8_SINT, .cpp = 2, .channels = 2 }, + // [VK_FORMAT_R8G8_SRGB] = { .format = R8G8_SRGB }, + [VK_FORMAT_R8G8B8_UNORM] = { .format = R8G8B8X8_UNORM, .cpp = 3, .channels = 3 }, + // [VK_FORMAT_R8G8B8_SNORM] = { .format = R8G8B8X8_SNORM, .cpp = 4 }, + [VK_FORMAT_R8G8B8_USCALED] = { .format = R8G8B8_USCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SSCALED] = { .format = R8G8B8_SSCALED, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_UINT] = { .format = R8G8B8_UINT, .cpp = 3, .channels = 3 }, + [VK_FORMAT_R8G8B8_SINT] = { .format = R8G8B8_SINT, .cpp = 3, .channels = 3 }, + // [VK_FORMAT_R8G8B8_SRGB] = { .format = R8G8B8_SRGB }, + [VK_FORMAT_R8G8B8A8_UNORM] = { .format = R8G8B8A8_UNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SNORM] = { .format = R8G8B8A8_SNORM, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_USCALED] = { .format = R8G8B8A8_USCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SSCALED] = { .format = R8G8B8A8_SSCALED, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_UINT] = { .format = R8G8B8A8_UINT, .cpp = 4, .channels = 4 }, + [VK_FORMAT_R8G8B8A8_SINT] = { .format = R8G8B8A8_SINT, .cpp = 4, .channels = 4 }, + // [VK_FORMAT_R8G8B8A8_SRGB] = { .format = R8G8B8A8_SRGB }, + // [VK_FORMAT_R10G10B10A2_UNORM] = { .format = R10G10B10A2_UNORM }, + // [VK_FORMAT_R10G10B10A2_SNORM] = { .format = R10G10B10A2_SNORM }, + // [VK_FORMAT_R10G10B10A2_USCALED] = { .format = R10G10B10A2_USCALED }, + // [VK_FORMAT_R10G10B10A2_SSCALED] = { .format = R10G10B10A2_SSCALED }, + // [VK_FORMAT_R10G10B10A2_UINT] = { .format = R10G10B10A2_UINT }, + // [VK_FORMAT_R10G10B10A2_SINT] = { .format = R10G10B10A2_SINT }, + // [VK_FORMAT_R16_UNORM] = { .format = R16_UNORM }, + // [VK_FORMAT_R16_SNORM] = { .format = R16_SNORM }, + // [VK_FORMAT_R16_USCALED] = { .format = R16_USCALED }, + // [VK_FORMAT_R16_SSCALED] = { .format = R16_SSCALED }, + // [VK_FORMAT_R16_UINT] = { .format = R16_UINT }, + // [VK_FORMAT_R16_SINT] = { .format = R16_SINT }, + [VK_FORMAT_R16_SFLOAT] = { .format = R16_FLOAT, .cpp = 2, .channels = 1 }, + // [VK_FORMAT_R16G16_UNORM] = { .format = R16G16_UNORM }, + // [VK_FORMAT_R16G16_SNORM] = { .format = R16G16_SNORM }, + // [VK_FORMAT_R16G16_USCALED] = { .format = R16G16_USCALED }, + // [VK_FORMAT_R16G16_SSCALED] = { .format = R16G16_SSCALED }, + // [VK_FORMAT_R16G16_UINT] = { .format = R16G16_UINT }, + // [VK_FORMAT_R16G16_SINT] = { .format = R16G16_SINT }, + [VK_FORMAT_R16G16_SFLOAT] = { .format = R16G16_FLOAT, .cpp = 4, .channels = 2 }, + // [VK_FORMAT_R16G16B16_UNORM] = { .format = R16G16B16_UNORM }, + // [VK_FORMAT_R16G16B16_SNORM] = { .format = R16G16B16_SNORM }, + // [VK_FORMAT_R16G16B16_USCALED] = { .format = R16G16B16_USCALED }, + // [VK_FORMAT_R16G16B16_SSCALED] = { .format = R16G16B16_SSCALED }, + // [VK_FORMAT_R16G16B16_UINT] = { .format = R16G16B16_UINT }, + // [VK_FORMAT_R16G16B16_SINT] = { .format = R16G16B16_SINT }, + [VK_FORMAT_R16G16B16_SFLOAT] = { .format = R16G16B16_FLOAT, .cpp = 6, .channels = 3 }, + // [VK_FORMAT_R16G16B16A16_UNORM] = { .format = R16G16B16A16_UNORM }, + // [VK_FORMAT_R16G16B16A16_SNORM] = { .format = R16G16B16A16_SNORM }, + // [VK_FORMAT_R16G16B16A16_USCALED] = { .format = R16G16B16A16_USCALED }, + // [VK_FORMAT_R16G16B16A16_SSCALED] = { .format = R16G16B16A16_SSCALED }, + // [VK_FORMAT_R16G16B16A16_UINT] = { .format = R16G16B16A16_UINT }, + // [VK_FORMAT_R16G16B16A16_SINT] = { .format = R16G16B16A16_SINT }, + [VK_FORMAT_R16G16B16A16_SFLOAT] = { .format = R16G16B16A16_FLOAT, .cpp = 8, .channels = 4 }, + // [VK_FORMAT_R32_UINT] = { .format = R32_UINT }, + // [VK_FORMAT_R32_SINT] = { .format = R32_SINT }, + [VK_FORMAT_R32_SFLOAT] = { .format = R32_FLOAT, .cpp = 4, .channels = 1 }, + // [VK_FORMAT_R32G32_UINT] = { .format = R32G32_UINT }, + // [VK_FORMAT_R32G32_SINT] = { .format = R32G32_SINT }, + [VK_FORMAT_R32G32_SFLOAT] = { .format = R32G32_FLOAT, .cpp = 8, .channels = 2 }, + // [VK_FORMAT_R32G32B32_UINT] = { .format = R32G32B32_UINT }, + // [VK_FORMAT_R32G32B32_SINT] = { .format = R32G32B32_SINT }, + [VK_FORMAT_R32G32B32_SFLOAT] = { .format = R32G32B32_FLOAT, .cpp = 12, .channels = 3 }, + // [VK_FORMAT_R32G32B32A32_UINT] = { .format = R32G32B32A32_UINT }, + // [VK_FORMAT_R32G32B32A32_SINT] = { .format = R32G32B32A32_SINT }, + [VK_FORMAT_R32G32B32A32_SFLOAT] = { .format = R32G32B32A32_FLOAT, .cpp = 16, .channels = 4 }, + [VK_FORMAT_R64_SFLOAT] = { .format = R64_FLOAT, .cpp = 8, .channels = 1 }, + [VK_FORMAT_R64G64_SFLOAT] = { .format = R64G64_FLOAT, .cpp = 16, .channels = 2 }, + [VK_FORMAT_R64G64B64_SFLOAT] = { .format = R64G64B64_FLOAT, .cpp = 24, .channels = 3 }, + [VK_FORMAT_R64G64B64A64_SFLOAT] = { .format = R64G64B64A64_FLOAT, .cpp = 32, .channels = 4 }, + // [VK_FORMAT_R11G11B10_UFLOAT] = { .format = R11G11B10_UFLOAT }, + // [VK_FORMAT_R9G9B9E5_UFLOAT] = { .format = R9G9B9E5_UFLOAT }, + // [VK_FORMAT_D16_UNORM] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_S8_UINT] = { .format = S8_UINT }, + // [VK_FORMAT_D16_UNORM_S8_UINT] = { .format = D16_UNORM }, + // [VK_FORMAT_D24_UNORM_S8_UINT] = { .format = D24_UNORM }, + // [VK_FORMAT_D32_SFLOAT_S8_UINT] = { .format = D32_SFLOAT }, + // [VK_FORMAT_BC1_RGB_UNORM] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGB_SRGB] = { .format = BC1_RGB }, + // [VK_FORMAT_BC1_RGBA_UNORM] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC1_RGBA_SRGB] = { .format = BC1_RGBA }, + // [VK_FORMAT_BC2_UNORM] = { .format = BC2_UNORM }, + // [VK_FORMAT_BC2_SRGB] = { .format = BC2_SRGB }, + // [VK_FORMAT_BC3_UNORM] = { .format = BC3_UNORM }, + // [VK_FORMAT_BC3_SRGB] = { .format = BC3_SRGB }, + // [VK_FORMAT_BC4_UNORM] = { .format = BC4_UNORM }, + // [VK_FORMAT_BC4_SNORM] = { .format = BC4_SNORM }, + // [VK_FORMAT_BC5_UNORM] = { .format = BC5_UNORM }, + // [VK_FORMAT_BC5_SNORM] = { .format = BC5_SNORM }, + // [VK_FORMAT_BC6H_UFLOAT] = { .format = BC6H_UFLOAT }, + // [VK_FORMAT_BC6H_SFLOAT] = { .format = BC6H_SFLOAT }, + // [VK_FORMAT_BC7_UNORM] = { .format = BC7_UNORM }, + // [VK_FORMAT_BC7_SRGB] = { .format = BC7_SRGB }, + // [VK_FORMAT_ETC2_R8G8B8_UNORM] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8_SRGB] = { .format = ETC2_R8G8B8 }, + // [VK_FORMAT_ETC2_R8G8B8A1_UNORM] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A1_SRGB] = { .format = ETC2_R8G8B8A1 }, + // [VK_FORMAT_ETC2_R8G8B8A8_UNORM] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_ETC2_R8G8B8A8_SRGB] = { .format = ETC2_R8G8B8A8 }, + // [VK_FORMAT_EAC_R11_UNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11_SNORM] = { .format = EAC_R11 }, + // [VK_FORMAT_EAC_R11G11_UNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_EAC_R11G11_SNORM] = { .format = EAC_R11G11 }, + // [VK_FORMAT_ASTC_4x4_UNORM] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_4x4_SRGB] = { .format = ASTC_4x4 }, + // [VK_FORMAT_ASTC_5x4_UNORM] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x4_SRGB] = { .format = ASTC_5x4 }, + // [VK_FORMAT_ASTC_5x5_UNORM] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_5x5_SRGB] = { .format = ASTC_5x5 }, + // [VK_FORMAT_ASTC_6x5_UNORM] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x5_SRGB] = { .format = ASTC_6x5 }, + // [VK_FORMAT_ASTC_6x6_UNORM] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_6x6_SRGB] = { .format = ASTC_6x6 }, + // [VK_FORMAT_ASTC_8x5_UNORM] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x5_SRGB] = { .format = ASTC_8x5 }, + // [VK_FORMAT_ASTC_8x6_UNORM] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x6_SRGB] = { .format = ASTC_8x6 }, + // [VK_FORMAT_ASTC_8x8_UNORM] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_8x8_SRGB] = { .format = ASTC_8x8 }, + // [VK_FORMAT_ASTC_10x5_UNORM] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x5_SRGB] = { .format = ASTC_10x5 }, + // [VK_FORMAT_ASTC_10x6_UNORM] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x6_SRGB] = { .format = ASTC_10x6 }, + // [VK_FORMAT_ASTC_10x8_UNORM] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x8_SRGB] = { .format = ASTC_10x8 }, + // [VK_FORMAT_ASTC_10x10_UNORM] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_10x10_SRGB] = { .format = ASTC_10x10 }, + // [VK_FORMAT_ASTC_12x10_UNORM] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x10_SRGB] = { .format = ASTC_12x10 }, + // [VK_FORMAT_ASTC_12x12_UNORM] = { .format = ASTC_12x12 }, + // [VK_FORMAT_ASTC_12x12_SRGB] = { .format = ASTC_12x12 }, + // [VK_FORMAT_B4G4R4A4_UNORM] = { .format = B4G4R4A4_UNORM }, + // [VK_FORMAT_B5G5R5A1_UNORM] = { .format = B5G5R5A1_UNORM }, + // [VK_FORMAT_B5G6R5_UNORM] = { .format = B5G6R5_UNORM }, + // [VK_FORMAT_B5G6R5_USCALED] = { .format = B5G6R5_USCALED }, + // [VK_FORMAT_B8G8R8_UNORM] = { .format = B8G8R8_UNORM }, + // [VK_FORMAT_B8G8R8_SNORM] = { .format = B8G8R8_SNORM }, + // [VK_FORMAT_B8G8R8_USCALED] = { .format = B8G8R8_USCALED }, + // [VK_FORMAT_B8G8R8_SSCALED] = { .format = B8G8R8_SSCALED }, + // [VK_FORMAT_B8G8R8_UINT] = { .format = B8G8R8_UINT }, + // [VK_FORMAT_B8G8R8_SINT] = { .format = B8G8R8_SINT }, + // [VK_FORMAT_B8G8R8_SRGB] = { .format = B8G8R8_SRGB }, + [VK_FORMAT_B8G8R8A8_UNORM] = { .format = B8G8R8A8_UNORM, .cpp = 4, .channels = 4 }, + // [VK_FORMAT_B8G8R8A8_SNORM] = { .format = B8G8R8A8_SNORM }, + // [VK_FORMAT_B8G8R8A8_USCALED] = { .format = B8G8R8A8_USCALED }, + // [VK_FORMAT_B8G8R8A8_SSCALED] = { .format = B8G8R8A8_SSCALED }, + // [VK_FORMAT_B8G8R8A8_UINT] = { .format = B8G8R8A8_UINT }, + // [VK_FORMAT_B8G8R8A8_SINT] = { .format = B8G8R8A8_SINT }, + // [VK_FORMAT_B8G8R8A8_SRGB] = { .format = B8G8R8A8_SRGB }, + // [VK_FORMAT_B10G10R10A2_UNORM] = { .format = B10G10R10A2_UNORM }, + // [VK_FORMAT_B10G10R10A2_SNORM] = { .format = B10G10R10A2_SNORM }, + // [VK_FORMAT_B10G10R10A2_USCALED] = { .format = B10G10R10A2_USCALED }, + // [VK_FORMAT_B10G10R10A2_SSCALED] = { .format = B10G10R10A2_SSCALED }, + // [VK_FORMAT_B10G10R10A2_UINT] = { .format = B10G10R10A2_UINT }, + // [VK_FORMAT_B10G10R10A2_SINT] = { .format = B10G10R10A2_SINT } +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +static const struct anv_tile_mode_info { + int32_t tile_width; + int32_t tile_height; +} tile_mode_info[] = { + [LINEAR] = { 1, 1 }, + [XMAJOR] = { 512, 8 }, + [YMAJOR] = { 128, 32 }, + [WMAJOR] = { 128, 32 } +}; + +VkResult VKAPI vkCreateImage( + VkDevice _device, + const VkImageCreateInfo* pCreateInfo, + VkImage* pImage) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_image *image; + const struct anv_format *format; + int32_t aligned_height; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + image->mem = NULL; + image->offset = 0; + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth > 0); + + switch (pCreateInfo->tiling) { + case VK_IMAGE_TILING_LINEAR: + image->tile_mode = LINEAR; + /* Linear depth buffers must be 64 byte aligned, which is the strictest + * requirement for all kinds of linear surfaces. + */ + image->alignment = 64; + break; + case VK_IMAGE_TILING_OPTIMAL: + image->tile_mode = YMAJOR; + image->alignment = 4096; + break; + default: + break; + } + + format = anv_format_for_vk_format(pCreateInfo->format); + image->stride = ALIGN_I32(image->extent.width * format->cpp, + tile_mode_info[image->tile_mode].tile_width); + aligned_height = ALIGN_I32(image->extent.height, + tile_mode_info[image->tile_mode].tile_height); + image->size = image->stride * aligned_height; + + *pImage = (VkImage) image; + + return VK_SUCCESS; +} + +VkResult VKAPI vkGetImageSubresourceInfo( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceInfoType infoType, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +// Image view functions + +static struct anv_state +create_surface_state(struct anv_device *device, + struct anv_image *image, const struct anv_format *format) +{ + struct anv_state state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = false, + .SurfaceFormat = format->format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = image->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = 0, /* FIXME: MOCS */ + .BaseMipLevel = 0, + .SurfaceQPitch = 0, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = image->extent.depth - 1, + .SurfacePitch = image->stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, image->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state.map, &surface_state); + + return state; +} + +VkResult VKAPI vkCreateImageView( + VkDevice _device, + const VkImageViewCreateInfo* pCreateInfo, + VkImageView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_image_view *view; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->image = (struct anv_image *) pCreateInfo->image; + + view->surface_state = create_surface_state(device, view->image, format); + + *pView = (VkImageView) view; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateColorAttachmentView( + VkDevice _device, + const VkColorAttachmentViewCreateInfo* pCreateInfo, + VkColorAttachmentView* pView) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_color_attachment_view *view; + struct anv_image *image; + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO); + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view->image = (struct anv_image *) pCreateInfo->image; + image = view->image; + + view->surface_state = create_surface_state(device, image, format); + + *pView = (VkColorAttachmentView) view; + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateDepthStencilView( + VkDevice device, + const VkDepthStencilViewCreateInfo* pCreateInfo, + VkDepthStencilView* pView) +{ + return VK_UNSUPPORTED; +} diff --git a/src/vulkan/intel.c b/src/vulkan/intel.c new file mode 100644 index 00000000000..81bd722d3e1 --- /dev/null +++ b/src/vulkan/intel.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +#include <vulkan/vulkan_intel.h> + +VkResult VKAPI vkCreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mem; + } + + image->mem = mem; + image->offset = 0; + image->type = VK_IMAGE_TYPE_2D; + image->extent = pCreateInfo->extent; + image->tile_mode = XMAJOR; + image->stride = pCreateInfo->strideInBytes; + image->size = mem->bo.size; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = (VkDeviceMemory) mem; + *pImage = (VkImage) image; + + return VK_SUCCESS; + + fail_mem: + anv_gem_close(device, mem->bo.gem_handle); + fail: + anv_device_free(device, mem); + + return result; +} diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c new file mode 100644 index 00000000000..5ff5bb9bd68 --- /dev/null +++ b/src/vulkan/meta.c @@ -0,0 +1,140 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +void VKAPI vkCmdCopyBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ +} + +void VKAPI vkCmdCopyImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ +} + +void VKAPI vkCmdBlitImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions) +{ +} + +void VKAPI vkCmdCopyBufferToImage( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ +} + +void VKAPI vkCmdCopyImageToBuffer( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ +} + +void VKAPI vkCmdCloneImageData( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout) +{ +} + +void VKAPI vkCmdUpdateBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ +} + +void VKAPI vkCmdFillBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize fillSize, + uint32_t data) +{ +} + +void VKAPI vkCmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColor* color, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ +} + +void VKAPI vkCmdClearDepthStencil( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ +} + +void VKAPI vkCmdResolveImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions) +{ +} diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c new file mode 100644 index 00000000000..33b4f64f489 --- /dev/null +++ b/src/vulkan/pipeline.c @@ -0,0 +1,565 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "private.h" + +// Shader functions + +VkResult VKAPI vkCreateShader( + VkDevice _device, + const VkShaderCreateInfo* pCreateInfo, + VkShader* pShader) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_shader *shader; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + + shader = anv_device_alloc(device, sizeof(*shader) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (shader == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->size = pCreateInfo->codeSize; + memcpy(shader->data, pCreateInfo->pCode, shader->size); + + *pShader = (VkShader) shader; + + return VK_SUCCESS; +} + +// Pipeline functions + +static void +emit_vertex_input(struct anv_pipeline *pipeline, VkPipelineVertexInputCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->location, + .Valid = true, + .SourceElementFormat = format->format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, VkPipelineIaStateCreateInfo *info) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LISTSTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + .CutIndex = info->primitiveRestartIndex); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = vk_to_gen_primitive_type[info->topology]); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, VkPipelineRsStateCreateInfo *info) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + static const uint32_t vk_to_gen_coordinate_origin[] = { + [VK_COORDINATE_ORIGIN_UPPER_LEFT] = UPPERLEFT, + [VK_COORDINATE_ORIGIN_LOWER_LEFT] = LOWERLEFT + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = true, + .TriangleStripListProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, + .LineStripListProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 1, + .TriangleFanProvokingVertexSelect = + info->provokingVertex == VK_PROVOKING_VERTEX_FIRST ? 0 : 2, + .PointWidthSource = info->programPointSize ? Vertex : State, + }; + + /* bool32_t rasterizerDiscardEnable; */ + + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_RASTER, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = true, + .ViewportZClipTestEnable = info->depthClipEnable); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = + vk_to_gen_coordinate_origin[info->pointOrigin], + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +VkResult VKAPI vkCreateGraphicsPipeline( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline *pipeline; + const struct anv_common *common; + VkPipelineShaderStageCreateInfo *shader_create_info; + VkPipelineIaStateCreateInfo *ia_info; + VkPipelineRsStateCreateInfo *rs_info; + VkPipelineVertexInputCreateInfo *vi_info; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + result = anv_batch_init(&pipeline->batch, device); + if (result != VK_SUCCESS) + goto fail; + + for (common = pCreateInfo->pNext; common; common = common->pNext) { + switch (common->sType) { + case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO: + vi_info = (VkPipelineVertexInputCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO: + ia_info = (VkPipelineIaStateCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_TESS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_VP_STATE_CREATE_INFO: + break; + case VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO: + rs_info = (VkPipelineRsStateCreateInfo *) common; + break; + case VK_STRUCTURE_TYPE_PIPELINE_MS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_CB_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_DS_STATE_CREATE_INFO: + case VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO: + shader_create_info = (VkPipelineShaderStageCreateInfo *) common; + pipeline->shaders[shader_create_info->shader.stage] = + (struct anv_shader *) shader_create_info->shader.shader; + break; + default: + break; + } + } + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + emit_vertex_input(pipeline, vi_info); + emit_ia_state(pipeline, ia_info); + emit_rs_state(pipeline, rs_info); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointer = 0, + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + //trp_generate_blend_hw_cmds(batch, pipeline); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + /* FIXME: pointer needs to be assigned outside as it aliases + * PerThreadScratchSpace. + */ + .ScratchSpaceBasePointer = 0, + .PerThreadScratchSpace = 0, + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = ia_info->disableVertexReuse, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 0, + + .ScratchSpaceBasePointer = 0, + .PerThreadScratchSpace = 0, + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = (VkPipeline) pipeline; + + return VK_SUCCESS; + + fail: + anv_device_free(device, pipeline); + + return result; +} + +VkResult +anv_pipeline_destroy(struct anv_pipeline *pipeline) +{ + anv_compiler_free(pipeline); + anv_batch_finish(&pipeline->batch, pipeline->device); + anv_device_free(pipeline->device, pipeline); + + return VK_SUCCESS; +} + +VkResult VKAPI vkCreateGraphicsPipelineDerivative( + VkDevice device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + VkPipeline basePipeline, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkCreateComputePipeline( + VkDevice device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkStorePipeline( + VkDevice device, + VkPipeline pipeline, + size_t* pDataSize, + void* pData) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkLoadPipeline( + VkDevice device, + size_t dataSize, + const void* pData, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +VkResult VKAPI vkLoadPipelineDerivative( + VkDevice device, + size_t dataSize, + const void* pData, + VkPipeline basePipeline, + VkPipeline* pPipeline) +{ + return VK_UNSUPPORTED; +} + +// Pipeline layout functions + +VkResult VKAPI vkCreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + VkPipelineLayout* pPipelineLayout) +{ + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline_layout *layout; + struct anv_pipeline_layout_entry *entry; + uint32_t total; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + total = 0; + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + struct anv_descriptor_set_layout *set_layout = + (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; + for (uint32_t j = 0; j < set_layout->count; j++) + total += set_layout->total; + } + + size = sizeof(*layout) + total * sizeof(layout->entries[0]); + layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + entry = layout->entries; + for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) { + layout->stage[s].entries = entry; + + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + struct anv_descriptor_set_layout *set_layout = + (struct anv_descriptor_set_layout *) pCreateInfo->pSetLayouts[i]; + for (uint32_t j = 0; j < set_layout->count; j++) + if (set_layout->bindings[j].mask & (1 << s)) { + entry->type = set_layout->bindings[j].type; + entry->set = i; + entry->index = j; + entry++; + } + } + + layout->stage[s].count = entry - layout->stage[s].entries; + } + + *pPipelineLayout = (VkPipelineLayout) layout; + + return VK_SUCCESS; +} diff --git a/src/vulkan/private.h b/src/vulkan/private.h new file mode 100644 index 00000000000..31d4ab242fb --- /dev/null +++ b/src/vulkan/private.h @@ -0,0 +1,594 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <pthread.h> +#include <assert.h> +#include <i915_drm.h> + +#include "brw_device_info.h" +#include "util/macros.h" + +#define VK_PROTOTYPES +#include <vulkan/vulkan.h> + +#undef VKAPI +#define VKAPI __attribute__ ((visibility ("default"))) + +#include "brw_context.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline uint32_t +ALIGN_U32(uint32_t v, uint32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +ALIGN_I32(int32_t v, int32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +static inline VkResult +vk_error(VkResult error) +{ +#ifdef DEBUG + fprintf(stderr, "vk_error: %x\n", error); +#endif + + return error; +} + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + int gem_handle; + uint32_t index; + uint64_t offset; + uint64_t size; + + /* This field is here for the benefit of the aub dumper. It can (and for + * userptr bos it must) be set to the cpu map of the buffer. Destroying + * the bo won't clean up the mmap, it's still the responsibility of the bo + * user to do that. */ + void *map; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + uint32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + void *map; + int fd; + uint32_t size; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + uint32_t next_block; + union anv_free_list free_list; +}; + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_state { + uint32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + uint32_t next; + uint32_t current_block; + uint32_t end; +}; + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +struct anv_physical_device { + struct anv_instance * instance; + uint32_t chipset_id; + bool no_hw; + const char * path; + const char * name; + const struct brw_device_info * info; +}; + +struct anv_instance { + void * pAllocUserData; + PFN_vkAllocFunction pfnAlloc; + PFN_vkFreeFunction pfnFree; + uint32_t apiVersion; + uint32_t physicalDeviceCount; + struct anv_physical_device physicalDevice; +}; + +struct anv_device { + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + int context_id; + int fd; + bool no_hw; + bool dump_aub; + + struct anv_block_pool dyn_state_block_pool; + struct anv_state_pool dyn_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_compiler * compiler; + struct anv_aub_writer * aub_writer; + pthread_mutex_t mutex; +}; + +struct anv_queue { + struct anv_device * device; + + struct anv_state_pool * pool; + + /** + * Serial number of the most recently completed batch executed on the + * engine. + */ + struct anv_state completed_serial; + + /** + * The next batch submitted to the engine will be assigned this serial + * number. + */ + uint32_t next_serial; + + uint32_t last_collected_serial; +}; + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType); + +void +anv_device_free(struct anv_device * device, + void * mem); + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, int gem_handle); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, int gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +int anv_gem_get_aperture(struct anv_device *device, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); +int anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +/* TODO: Remove hardcoded reloc limit. */ +#define ANV_BATCH_MAX_RELOCS 256 + +struct anv_reloc_list { + size_t num_relocs; + struct drm_i915_gem_relocation_entry relocs[ANV_BATCH_MAX_RELOCS]; + struct anv_bo * reloc_bos[ANV_BATCH_MAX_RELOCS]; +}; + +struct anv_batch { + struct anv_bo bo; + void * next; + struct anv_reloc_list cmd_relocs; + struct anv_reloc_list surf_relocs; +}; + +VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); +void anv_batch_finish(struct anv_batch *batch, struct anv_device *device); +void anv_batch_reset(struct anv_batch *batch); +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return delta; + } else { + assert(batch->bo.map <= location && + (char *) location < (char *) batch->bo.map + batch->bo.size); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +#undef GEN8_3DSTATE_MULTISAMPLE +#include "gen8_pack.h" + +#define anv_batch_emit(batch, cmd, ...) do { \ + struct cmd __template = { \ + cmd ## _header, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ + cmd ## _pack(batch, __dst, &__template); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + struct cmd __template = { \ + cmd ## _header, \ + .DwordLength = n - cmd ## _length_bias, \ + __VA_ARGS__ \ + }; \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + cmd ## _pack(batch, __dst, &__template); \ + __dst; \ + }) + +struct anv_device_memory { + struct anv_bo bo; + VkDeviceSize map_size; + void *map; +}; + +struct anv_dynamic_vp_state { + struct anv_state sf_clip_vp; + struct anv_state cc_vp; + struct anv_state scissor; +}; + +struct anv_dynamic_rs_state { + uint32_t state_sf[GEN8_3DSTATE_SF_length]; +}; + +struct anv_dynamic_cb_state { + uint32_t blend_offset; +}; + +struct anv_descriptor_set_layout { + uint32_t total; /* total number of entries in all stages */ + uint32_t count; + struct { + VkDescriptorType type; + uint32_t mask; + } bindings[0]; +}; + +struct anv_descriptor_set { + void *descriptors[0]; +}; + +struct anv_pipeline_layout_entry { + VkDescriptorType type; + uint32_t set; + uint32_t index; +}; + +struct anv_pipeline_layout { + struct { + uint32_t count; + struct anv_pipeline_layout_entry *entries; + } stage[VK_NUM_SHADER_STAGE]; + + struct anv_pipeline_layout_entry entries[0]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + /* Set when bound */ + struct anv_device_memory * mem; + VkDeviceSize offset; +}; + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 + +#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) +#define ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY (1 << 1) +#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) + +struct anv_cmd_buffer { + struct anv_device * device; + + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 * exec2_objects; + struct anv_bo ** exec2_bos; + bool need_reloc; + uint32_t serial; + + uint32_t bo_count; + struct anv_batch batch; + struct anv_state_stream surface_state_stream; + + /* State required while building cmd buffer */ + struct { + struct anv_buffer *buffer; + VkDeviceSize offset; + } vb[MAX_VBS]; + uint32_t vb_dirty; + uint32_t num_descriptor_sets; + struct anv_descriptor_set * descriptor_sets[MAX_SETS]; + uint32_t dirty; + struct anv_pipeline * pipeline; + struct anv_framebuffer * framebuffer; + struct anv_dynamic_rs_state * rs_state; +}; + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_aub_writer_destroy(struct anv_aub_writer *writer); + +struct anv_shader { + uint32_t size; + char data[0]; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + struct anv_shader * shaders[VK_NUM_SHADER_STAGE]; + struct anv_pipeline_layout * layout; + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + struct anv_bo vs_scratch_bo; + struct anv_bo ps_scratch_bo; + struct anv_bo gs_scratch_bo; + + uint32_t active_stages; + uint32_t program_block; + uint32_t program_next; + uint32_t vs_simd8; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t gs_vec4; + uint32_t gs_vertex_count; + + uint32_t binding_stride[MAX_VBS]; + + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; +}; + +VkResult anv_pipeline_destroy(struct anv_pipeline *pipeline); + +struct anv_compiler *anv_compiler_create(int fd); +void anv_compiler_destroy(struct anv_compiler *compiler); +int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); +void anv_compiler_free(struct anv_pipeline *pipeline); + +struct anv_format { + uint32_t format; + int32_t cpp; + int32_t channels; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +struct anv_image { + VkImageType type; + VkExtent3D extent; + uint32_t tile_mode; + VkDeviceSize size; + uint32_t alignment; + int32_t stride; + + /* Set when bound */ + struct anv_device_memory * mem; + VkDeviceSize offset; +}; + +struct anv_buffer_view { + struct anv_buffer * buffer; + struct anv_state surface_state; + uint32_t offset; +}; + +struct anv_color_attachment_view { + struct anv_image * image; + struct anv_state surface_state; +}; + +struct anv_image_view { + struct anv_image * image; + struct anv_state surface_state; +}; + +struct anv_depth_stencil_view { +}; + +struct anv_framebuffer { + uint32_t color_attachment_count; + struct anv_color_attachment_view * color_attachments[MAX_RTS]; + struct anv_depth_stencil_view * depth_stencil; + + uint32_t sample_count; + uint32_t width; + uint32_t height; + uint32_t layers; +}; + +struct anv_render_pass { + VkRect render_area; +}; + + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/util.c b/src/vulkan/util.c new file mode 100644 index 00000000000..847d13b2f55 --- /dev/null +++ b/src/vulkan/util.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include "private.h" + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(is_power_of_two(size)); + assert(element_size < size && is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = ALIGN_U32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/vulkan/vk.c b/src/vulkan/vk.c new file mode 100644 index 00000000000..4bcb54d5e4f --- /dev/null +++ b/src/vulkan/vk.c @@ -0,0 +1,723 @@ +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> + +#define VK_PROTOTYPES +#include <vulkan/vulkan.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdarg.h> +#include <poll.h> +#include <libpng16/png.h> + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static void +write_png(char *path, int32_t width, int32_t height, int32_t stride, void *pixels) +{ + FILE *f = NULL; + png_structp png_writer = NULL; + png_infop png_info = NULL; + + uint8_t *rows[height]; + + for (int32_t y = 0; y < height; y++) + rows[y] = pixels + y * stride; + + f = fopen(path, "wb"); + fail_if(!f, "failed to open file for writing: %s", path); + + png_writer = png_create_write_struct(PNG_LIBPNG_VER_STRING, + NULL, NULL, NULL); + fail_if (!png_writer, "failed to create png writer"); + + png_info = png_create_info_struct(png_writer); + fail_if(!png_info, "failed to create png writer info"); + + png_init_io(png_writer, f); + png_set_IHDR(png_writer, png_info, + width, height, + 8, PNG_COLOR_TYPE_RGBA, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + png_write_info(png_writer, png_info); + png_set_rows(png_writer, png_info, rows); + png_write_png(png_writer, png_info, PNG_TRANSFORM_IDENTITY, NULL); + + png_destroy_write_struct(&png_writer, &png_info); + + fclose(f); +} + +static void * +test_alloc(void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void +test_free(void* pUserData, + void* pMem) +{ + free(pMem); +} + +#define GLSL(src) "#version 330\n" #src + +static void +create_pipeline(VkDevice device, VkPipeline *pipeline, + VkPipelineLayout pipeline_layout) +{ + VkPipelineIaStateCreateInfo ia_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_IA_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .disableVertexReuse = false, + .primitiveRestartEnable = false, + .primitiveRestartIndex = 0 + }; + + static const char vs_source[] = GLSL( + layout(location = 0) in vec4 a_position; + layout(location = 1) in vec4 a_color; + layout(set = 0, index = 0) uniform block1 { + vec4 color; + } u1; + layout(set = 0, index = 1) uniform block2 { + vec4 color; + } u2; + layout(set = 1, index = 0) uniform block3 { + vec4 color; + } u3; + out vec4 v_color; + void main() + { + gl_Position = a_position; + v_color = a_color + u1.color + u2.color + u3.color; + }); + + static const char fs_source[] = GLSL( + out vec4 f_color; + in vec4 v_color; + layout(set = 0, index = 0) uniform sampler2D tex; + void main() + { + f_color = v_color + texture2D(tex, vec2(0.1, 0.1)); + }); + + VkShader vs; + vkCreateShader(device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(vs_source), + .pCode = vs_source, + .flags = 0 + }, + &vs); + + VkShader fs; + vkCreateShader(device, + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .codeSize = sizeof(fs_source), + .pCode = fs_source, + .flags = 0 + }, + &fs); + + VkPipelineShaderStageCreateInfo vs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &ia_create_info, + .shader = { + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineShaderStageCreateInfo fs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &vs_create_info, + .shader = { + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .linkConstBufferCount = 0, + .pLinkConstBufferInfo = NULL, + .pSpecializationInfo = NULL + } + }; + + VkPipelineVertexInputCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_CREATE_INFO, + .pNext = &fs_create_info, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + } + }, + .attributeCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + }, + { + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 0 + } + } + }; + + VkPipelineRsStateCreateInfo rs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RS_STATE_CREATE_INFO, + .pNext = &vi_create_info, + + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }; + + vkCreateGraphicsPipeline(device, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = &rs_create_info, + .flags = 0, + .layout = pipeline_layout + }, + pipeline); + + + vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, fs); + vkDestroyObject(device, VK_OBJECT_TYPE_SHADER, vs); +} + +int main(int argc, char *argv[]) +{ + VkInstance instance; + vkCreateInstance(&(VkInstanceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pAllocCb = &(VkAllocCallbacks) { + .pUserData = NULL, + .pfnAlloc = test_alloc, + .pfnFree = test_free + }, + .pAppInfo = &(VkApplicationInfo) { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pAppName = "vk", + .apiVersion = 1 + } + }, + &instance); + + uint32_t count = 1; + VkPhysicalDevice physicalDevices[1]; + vkEnumeratePhysicalDevices(instance, &count, physicalDevices); + printf("%d physical devices\n", count); + + VkPhysicalDeviceProperties properties; + size_t size = sizeof(properties); + vkGetPhysicalDeviceInfo(physicalDevices[0], + VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES, + &size, &properties); + printf("vendor id %04x, device name %s\n", + properties.vendorId, properties.deviceName); + + VkDevice device; + vkCreateDevice(physicalDevices[0], + &(VkDeviceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueRecordCount = 1, + .pRequestedQueues = &(VkDeviceQueueCreateInfo) { + .queueNodeIndex = 0, + .queueCount = 1 + } + }, + &device); + + VkQueue queue; + vkGetDeviceQueue(device, 0, 0, &queue); + + VkCmdBuffer cmdBuffer; + vkCreateCommandBuffer(device, + &(VkCmdBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_CREATE_INFO, + .queueNodeIndex = 0, + .flags = 0 + }, + &cmdBuffer); + + + VkDescriptorSetLayout set_layout[2]; + vkCreateDescriptorSetLayout(device, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 2, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .count = 2, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = NULL + }, + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .count = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + } + } + }, + &set_layout[0]); + + vkCreateDescriptorSetLayout(device, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .count = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = NULL + } + } + }, + &set_layout[1]); + + VkPipelineLayout pipeline_layout; + vkCreatePipelineLayout(device, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 2, + .pSetLayouts = set_layout, + }, + &pipeline_layout); + + VkPipeline pipeline; + create_pipeline(device, &pipeline, pipeline_layout); + + VkDescriptorSet set[2]; + vkAllocDescriptorSets(device, 0 /* pool */, + VK_DESCRIPTOR_SET_USAGE_STATIC, + 2, set_layout, set, &count); + + VkBuffer buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 1024, + .usage = VK_BUFFER_USAGE_GENERAL, + .flags = 0 + }, + &buffer); + + VkMemoryRequirements buffer_requirements; + size = sizeof(buffer_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &buffer_requirements); + + int32_t width = 256, height = 256; + + VkImage rt; + vkCreateImage(device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = { .width = width, .height = height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }, + &rt); + + VkMemoryRequirements rt_requirements; + size = sizeof(rt_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_IMAGE, rt, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &rt_requirements); + + VkBuffer vertex_buffer; + vkCreateBuffer(device, + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = 1024, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .flags = 0 + }, + &vertex_buffer); + + VkMemoryRequirements vb_requirements; + size = sizeof(vb_requirements); + vkGetObjectInfo(device, VK_OBJECT_TYPE_BUFFER, vertex_buffer, + VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS, + &size, &vb_requirements); + + printf("buffer size: %lu, buffer alignment: %lu\n", + buffer_requirements.size, buffer_requirements.alignment); + printf("rt size: %lu, rt alignment: %lu\n", + rt_requirements.size, rt_requirements.alignment); + printf("vb size: %lu vb alignment: %lu\n", + vb_requirements.size, vb_requirements.alignment); + + size_t mem_size = rt_requirements.size + 2048 + 16 * 16 * 4; + VkDeviceMemory mem; + vkAllocMemory(device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = mem_size, + .memProps = VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT, + .memPriority = VK_MEMORY_PRIORITY_NORMAL + }, + &mem); + + void *map; + vkMapMemory(device, mem, 0, mem_size, 0, &map); + memset(map, 192, mem_size); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 128); + + float color[12] = { + 0.0, 0.2, 0.0, 0.0, + 0.0, 0.0, 0.5, 0.0, + 0.0, 0.0, 0.5, 0.5 + }; + memcpy(map + 128 + 16, color, sizeof(color)); + VkBufferView buffer_view[3]; + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 16, + .range = 64 + }, + &buffer_view[0]); + + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 32, + .range = 64 + }, + &buffer_view[1]); + + vkCreateBufferView(device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = buffer, + .viewType = VK_BUFFER_VIEW_TYPE_RAW, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = 48, + .range = 64 + }, + &buffer_view[2]); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_BUFFER, + vertex_buffer, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 1024); + static const float vertex_data[] = { + /* Triangle coordinates */ + -0.5, -0.5, 0.0, 1.0, + 0.5, -0.5, 0.0, 1.0, + 0.0, 0.5, 0.0, 1.0, + /* Color */ + 1.0, 0.0, 0.0, 0.2, + }; + memcpy(map + 1024, vertex_data, sizeof(vertex_data)); + + VkDynamicVpState vp_state; + vkCreateDynamicViewportState(device, + &(VkDynamicVpStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO, + .viewportAndScissorCount = 2, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = width, + .height = height, + .minDepth = 0, + .maxDepth = 1 + }, + { + .originX = -10, + .originY = -10, + .width = 20, + .height = 20, + .minDepth = -1, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect[]) { + { { 0, 0 }, { width, height } }, + { { 10, 10 }, { 236, 236 } } + } + }, + &vp_state); + + VkDynamicRsState rs_state; + vkCreateDynamicRasterState(device, + &(VkDynamicRsStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO, + }, + &rs_state); + + /* FIXME: Need to query memory info before binding to memory */ + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, + rt, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048); + + const uint32_t texture_width = 16, texture_height = 16; + VkImage texture; + vkCreateImage(device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = { .width = texture_width, .height = texture_height, .depth = 1 }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, + &texture); + + VkImageView image_view; + vkCreateImageView(device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = texture, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + .minLod = 0 + }, + &image_view); + + vkQueueBindObjectMemory(queue, VK_OBJECT_TYPE_IMAGE, + texture, + 0, /* allocation index; for objects which need to bind to multiple mems */ + mem, 2048 + 256 * 256 * 4); + + vkUpdateDescriptors(device, set[0], 2, + (const void * []) { + &(VkUpdateBuffers) { + .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .arrayIndex = 0, + .binding = 0, + .count = 2, + .pBufferViews = (VkBufferViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[0] + }, + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[1] + } + } + }, + &(VkUpdateImages) { + .sType = VK_STRUCTURE_TYPE_UPDATE_IMAGES, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .binding = 2, + .count = 1, + .pImageViews = (VkImageViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ATTACH_INFO, + .view = image_view, + .layout = VK_IMAGE_LAYOUT_GENERAL, + } + } + } + }); + + vkUpdateDescriptors(device, set[1], 1, + (const void * []) { + &(VkUpdateBuffers) { + .sType = VK_STRUCTURE_TYPE_UPDATE_BUFFERS, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .arrayIndex = 0, + .count = 1, + .pBufferViews = (VkBufferViewAttachInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_ATTACH_INFO, + .view = buffer_view[2] + } + } + } + }); + + VkColorAttachmentView view; + vkCreateColorAttachmentView(device, + &(VkColorAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COLOR_ATTACHMENT_VIEW_CREATE_INFO, + .image = rt, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + .msaaResolveImage = 0, + .msaaResolveSubResource = { 0, } + }, + &view); + + VkFramebuffer framebuffer; + vkCreateFramebuffer(device, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .colorAttachmentCount = 1, + .pColorAttachments = (VkColorAttachmentBindInfo[]) { + { + .view = view, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + } + }, + .pDepthStencilAttachment = NULL, + .sampleCount = 1, + .width = width, + .height = height, + .layers = 1 + }, + &framebuffer); + + VkRenderPass pass; + vkCreateRenderPass(device, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .renderArea = { { 0, 0 }, { width, height } }, + .colorAttachmentCount = 1, + .extent = { }, + .sampleCount = 1, + .layers = 1, + .pColorFormats = (VkFormat[]) { VK_FORMAT_R8G8B8A8_UNORM }, + .pColorLayouts = (VkImageLayout[]) { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }, + .pColorLoadOps = (VkAttachmentLoadOp[]) { VK_ATTACHMENT_LOAD_OP_CLEAR }, + .pColorStoreOps = (VkAttachmentStoreOp[]) { VK_ATTACHMENT_STORE_OP_STORE }, + .pColorLoadClearValues = (VkClearColor[]) { + { .color = { .floatColor = { 1.0, 0.0, 0.0, 1.0 } }, .useRawValue = false } + }, + .depthStencilFormat = VK_FORMAT_UNDEFINED, + }, + &pass); + + vkBeginCommandBuffer(cmdBuffer, + &(VkCmdBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_CMD_BUFFER_BEGIN_INFO, + .flags = 0 + }); + + vkCmdBeginRenderPass(cmdBuffer, + &(VkRenderPassBegin) { + .renderPass = pass, + .framebuffer = framebuffer + }); + + vkCmdBindVertexBuffers(cmdBuffer, 0, 2, + (VkBuffer[]) { vertex_buffer, vertex_buffer }, + (VkDeviceSize[]) { 0, 3 * 4 * sizeof(float) }); + + vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + vkCmdBindDescriptorSets(cmdBuffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 0, 1, + &set[0], 0, NULL); + vkCmdBindDescriptorSets(cmdBuffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, 1, 1, + &set[1], 0, NULL); + + vkCmdBindDynamicStateObject(cmdBuffer, + VK_STATE_BIND_POINT_VIEWPORT, vp_state); + vkCmdBindDynamicStateObject(cmdBuffer, + VK_STATE_BIND_POINT_RASTER, rs_state); + + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_TOP, buffer, 0); + vkCmdWriteTimestamp(cmdBuffer, VK_TIMESTAMP_TYPE_BOTTOM, buffer, 8); + + vkCmdDraw(cmdBuffer, 0, 3, 0, 1); + + vkCmdEndRenderPass(cmdBuffer, pass); + + vkEndCommandBuffer(cmdBuffer); + + vkQueueSubmit(queue, 1, &cmdBuffer, 0); + + vkQueueWaitIdle(queue); + + write_png("vk.png", width, height, 1024, map + 2048); + + vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, texture); + vkDestroyObject(device, VK_OBJECT_TYPE_IMAGE, rt); + vkDestroyObject(device, VK_OBJECT_TYPE_BUFFER, buffer); + vkDestroyObject(device, VK_OBJECT_TYPE_COMMAND_BUFFER, cmdBuffer); + vkDestroyObject(device, VK_OBJECT_TYPE_PIPELINE, pipeline); + + vkDestroyDevice(device); + + vkDestroyInstance(instance); + + return 0; +} |