diff options
author | Kristian Høgsberg Kristensen <[email protected]> | 2015-08-20 22:59:19 -0700 |
---|---|---|
committer | Kristian Høgsberg Kristensen <[email protected]> | 2015-08-24 13:45:41 -0700 |
commit | f1455ffac78b1369d9c4187b7f1d36c2d96e0bab (patch) | |
tree | b49a3230e3820c62866883a6dbdbca5be8545efd /src | |
parent | 891995e55bbadf75699e659c9d5ded24419e3ad3 (diff) |
vk: Add gen7 support
With all the previous commits in place, we can now drop in support for
multiple platforms. First up is gen7 (Ivybridge).
Signed-off-by: Kristian Høgsberg Kristensen <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/vulkan/Makefile.am | 5 | ||||
-rw-r--r-- | src/vulkan/anv_cmd_buffer.c | 19 | ||||
-rw-r--r-- | src/vulkan/anv_compiler.cpp | 10 | ||||
-rw-r--r-- | src/vulkan/anv_device.c | 3 | ||||
-rw-r--r-- | src/vulkan/anv_image.c | 24 | ||||
-rw-r--r-- | src/vulkan/anv_pipeline.c | 4 | ||||
-rw-r--r-- | src/vulkan/anv_private.h | 49 | ||||
-rw-r--r-- | src/vulkan/gen7_cmd_buffer.c | 647 | ||||
-rw-r--r-- | src/vulkan/gen7_pipeline.c | 595 | ||||
-rw-r--r-- | src/vulkan/gen7_state.c | 455 | ||||
-rw-r--r-- | src/vulkan/gen8_state.c | 20 |
11 files changed, 1798 insertions, 33 deletions
diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am index 2359ffeeff1..89880b77c01 100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@ -75,7 +75,10 @@ VULKAN_SOURCES = \ anv_x11.c \ gen8_state.c \ gen8_cmd_buffer.c \ - gen8_pipeline.c + gen8_pipeline.c \ + gen7_state.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c libvulkan_la_SOURCES = \ $(VULKAN_SOURCES) \ diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 6cb98a21ca4..033c7872aaf 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -55,6 +55,8 @@ anv_cmd_state_init(struct anv_cmd_state *state) state->vp_state = NULL; state->rs_state = NULL; state->ds_state = NULL; + + state->gen7.index_buffer = NULL; } VkResult anv_CreateCommandBuffer( @@ -141,6 +143,8 @@ void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { switch (cmd_buffer->device->info.gen) { + case 7: + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); default: @@ -324,11 +328,15 @@ static void add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, struct anv_state state, struct anv_bo *bo, uint32_t offset) { - /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ - *(uint64_t *)(state.map + 8 * 4) = - anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), - cmd_buffer->device, state.offset + 8 * 4, bo, offset); + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + *(uint32_t *)(state.map + dword * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, state.offset + dword * 4, bo, offset); } VkResult @@ -610,6 +618,9 @@ anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass) { switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; case 8: gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); break; diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index 258abfb52be..2dbf59f991e 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -258,7 +258,14 @@ really_do_vs_prog(struct brw_context *brw, return false; } - pipeline->vs_simd8 = upload_kernel(pipeline, program, program_size); + const uint32_t offset = upload_kernel(pipeline, program, program_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } ralloc_free(mem_ctx); @@ -1121,6 +1128,7 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) } else { memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index 27a51129a74..6d2f58603b3 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1330,6 +1330,9 @@ anv_fill_buffer_surface_state(struct anv_device *device, void *state, uint32_t offset, uint32_t range) { switch (device->info.gen) { + case 7: + gen7_fill_buffer_surface_state(state, format, offset, range); + break; case 8: gen8_fill_buffer_surface_state(state, format, offset, range); break; diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c index 51f2cf5244c..15a736c25bc 100644 --- a/src/vulkan/anv_image.c +++ b/src/vulkan/anv_image.c @@ -414,6 +414,9 @@ anv_image_view_init(struct anv_image_view *iview, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { + case 7: + gen7_image_view_init(iview, device, pCreateInfo, cmd_buffer); + break; case 8: gen8_image_view_init(iview, device, pCreateInfo, cmd_buffer); break; @@ -428,15 +431,19 @@ anv_CreateImageView(VkDevice _device, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; - switch (device->info.gen) { - case 8: - return gen8_CreateImageView(_device, pCreateInfo, pView); - default: - unreachable("unsupported gen\n"); - } -} + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_image_view_to_handle(view); + return VK_SUCCESS; +} VkResult anv_DestroyImageView(VkDevice _device, VkImageView _iview) @@ -484,6 +491,9 @@ anv_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_cmd_buffer *cmd_buffer) { switch (device->info.gen) { + case 7: + gen7_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); + break; case 8: gen8_color_attachment_view_init(aview, device, pCreateInfo, cmd_buffer); break; diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 02343e489d1..39fcd235fa4 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -285,6 +285,8 @@ anv_graphics_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); switch (device->info.gen) { + case 7: + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); case 8: return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); default: @@ -325,6 +327,8 @@ static VkResult anv_compute_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); switch (device->info.gen) { + case 7: + return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); case 8: return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); default: diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 2da4c414b41..5d5ab462d1b 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -550,6 +550,12 @@ __gen_combine_address(struct anv_batch *batch, void *location, VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ } while (0) +static const struct GEN7_MEMORY_OBJECT_CONTROL_STATE GEN7_MOCS = { + .GraphicsDataTypeGFDT = 0, + .LLCCacheabilityControlLLCCC = 0, + .L3CacheabilityControlL3CC = 0 +}; + #define GEN8_MOCS { \ .MemoryTypeLLCeLLCCacheabilityControl = WB, \ .TargetCache = L3DefertoPATforLLCeLLCselection, \ @@ -570,6 +576,10 @@ struct anv_dynamic_vp_state { struct anv_dynamic_rs_state { struct { + uint32_t sf[GEN7_3DSTATE_SF_length]; + } gen7; + + struct { uint32_t sf[GEN8_3DSTATE_SF_length]; uint32_t raster[GEN8_3DSTATE_RASTER_length]; } gen8; @@ -577,6 +587,11 @@ struct anv_dynamic_rs_state { struct anv_dynamic_ds_state { struct { + uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; + uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; + } gen7; + + struct { uint32_t wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; uint32_t color_calc_state[GEN8_COLOR_CALC_STATE_length]; } gen8; @@ -689,6 +704,12 @@ struct anv_cmd_state { uint32_t state_vf[GEN8_3DSTATE_VF_length]; struct anv_vertex_binding vertex_bindings[MAX_VBS]; struct anv_descriptor_set_binding descriptors[MAX_SETS]; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; + uint32_t index_offset; + } gen7; }; struct anv_cmd_pool { @@ -793,10 +814,14 @@ anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + void gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, struct anv_subpass *subpass); @@ -856,6 +881,7 @@ struct anv_pipeline { struct anv_state_stream program_stream; struct anv_state blend_state; uint32_t vs_simd8; + uint32_t vs_vec4; uint32_t ps_simd8; uint32_t ps_simd16; uint32_t ps_ksp0; @@ -876,6 +902,11 @@ struct anv_pipeline { uint32_t cs_right_mask; struct { + uint32_t sf[GEN7_3DSTATE_SF_length]; + uint32_t depth_stencil_state[GEN7_DEPTH_STENCIL_STATE_length]; + } gen7; + + struct { uint32_t sf[GEN8_3DSTATE_SF_length]; uint32_t vf[GEN8_3DSTATE_VF_length]; uint32_t raster[GEN8_3DSTATE_RASTER_length]; @@ -914,6 +945,11 @@ gen8_graphics_pipeline_create(VkDevice _device, const struct anv_graphics_pipeline_create_info *extra, VkPipeline *pPipeline); VkResult +gen7_compute_pipeline_create(VkDevice _device, + const VkComputePipelineCreateInfo *pCreateInfo, + VkPipeline *pPipeline); + +VkResult gen8_compute_pipeline_create(VkDevice _device, const VkComputePipelineCreateInfo *pCreateInfo, VkPipeline *pPipeline); @@ -1081,6 +1117,12 @@ void anv_image_view_init(struct anv_image_view *view, struct anv_cmd_buffer *cmd_buffer); void +gen7_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + +void gen8_image_view_init(struct anv_image_view *iview, struct anv_device *device, const VkImageViewCreateInfo* pCreateInfo, @@ -1091,6 +1133,11 @@ void anv_color_attachment_view_init(struct anv_color_attachment_view *view, const VkAttachmentViewCreateInfo* pCreateInfo, struct anv_cmd_buffer *cmd_buffer); +void gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, const VkAttachmentViewCreateInfo* pCreateInfo, @@ -1104,6 +1151,8 @@ void anv_fill_buffer_surface_state(struct anv_device *device, void *state, const struct anv_format *format, uint32_t offset, uint32_t range); +void gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range); void gen8_fill_buffer_surface_state(void *state, const struct anv_format *format, uint32_t offset, uint32_t range); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c new file mode 100644 index 00000000000..74fc60fa84c --- /dev/null +++ b/src/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,647 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + + +void +gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN7_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN7_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateAccessUpperBound = { scratch_bo, scratch_bo->size }, + .GeneralStateAccessUpperBoundModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN7_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN7_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateAccessUpperBound = { &device->dynamic_state_block_pool.bo, + device->dynamic_state_block_pool.bo.size }, + .DynamicStateAccessUpperBoundModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN7_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + + .IndirectObjectAccessUpperBound = { NULL, 0xffffffff }, + .IndirectObjectAccessUpperBoundModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN7_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionAccessUpperBound = { &device->instruction_block_pool.bo, + device->instruction_block_pool.bo.size }, + .InstructionAccessUpperBoundModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +void gen7_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +gen7_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + /* FIXME: figure out descriptors for gen7 */ + result = gen7_flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty) + anv_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->gen7.sf, + pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + pipeline->gen7.depth_stencil_state, + GEN7_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.depth_stencil_state, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = state.offset); + } + + if (cmd_buffer->state.dirty & + (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->gen7.color_calc_state, + cmd_buffer->state.cb_state->color_calc_state, + GEN7_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void gen7_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void gen7_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +gen7_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void gen7_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + gen7_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void gen7_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + +void gen7_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + gen7_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void gen7_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + gen7_cmd_buffer_flush_compute_state(cmd_buffer); + + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); +} + +void gen7_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} + +void +gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN7_MOCS, + .RenderTargetViewExtent = 1 - 1); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER, + .StencilBufferObjectControlState = GEN7_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS); +} + +static void +begin_render_pass(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo* pRenderPassBegin) +{ + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); +} + +void gen7_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + + begin_render_pass(cmd_buffer, pRenderPassBegin); + + gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void gen7_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} + +void gen7_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c new file mode 100644 index 00000000000..7a54d7eebe4 --- /dev/null +++ b/src/vulkan/gen7_pipeline.c @@ -0,0 +1,595 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +static void +gen7_emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const bool sgvs = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid; + const uint32_t element_count = info->attributeCount + (sgvs ? 1 : 0); + const uint32_t num_dwords = 1 + element_count * 2; + uint32_t *p; + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN7_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN7_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + } + + if (sgvs) { + struct GEN7_VERTEX_ELEMENT_STATE element = { + .Valid = true, + /* FIXME: Do we need to provide the base vertex as component 0 here + * to support the correct base vertex ID? */ + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID + }; + GEN7_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + info->attributeCount * 2], &element); + } +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise +}; + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* FIXME: Get this from pass info */ + .DepthBufferSurfaceFormat = D24_UNORM_X8_UINT, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + bool has_stencil = false; /* enable if subpass has stencil? */ + + struct GEN7_DEPTH_STENCIL_STATE state = { + /* Is this what we need to do? */ + .StencilBufferWriteEnable = has_stencil, + + .StencilTestEnable = info->stencilTestEnable, + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + + .DoubleSidedStencilEnable = true, + + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + + .DepthTestEnable = info->depthTestEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DepthBufferWriteEnable = info->depthWriteEnable, + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + + uint32_t num_dwords = GEN7_BLEND_STATE_length; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN7_BLEND_STATE blend_state = { + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + +#if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +#endif + + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +#if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +#endif + }; + + GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 +}; + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 1024); +} + +VkResult +gen7_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, pCreateInfo, extra); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + gen7_emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + + assert(pCreateInfo->pRasterState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + + const VkPipelineRasterStateCreateInfo *rs_info = pCreateInfo->pRasterState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_GS, + .KernelStartPointer = pipeline->gs_vec4, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = gs_prog_data->invocations - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderEnable = true, + .GSEnable = true); + } + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_PS, + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult gen7_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = gen7_graphics_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +VkResult gen7_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + anv_finishme("primitive_id needs sbe swizzling setup"); + + return vk_error(VK_ERROR_UNAVAILABLE); +} diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c new file mode 100644 index 00000000000..4cab54f2d36 --- /dev/null +++ b/src/vulkan/gen7_state.c @@ -0,0 +1,455 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +void +gen7_fill_buffer_surface_state(void *state, const struct anv_format *format, + uint32_t offset, uint32_t range) +{ + /* This assumes RGBA float format. */ + + uint32_t stride = 16; /* Depends on whether accessing shader is simd8 or + * vec4. Will need one of each for buffers that are + * used in both vec4 and simd8. */ + + uint32_t num_elements = range / stride; + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = WriteOnlyCache, + .SurfaceObjectControlState = GEN7_MOCS, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN7_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult gen7_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer_view *view; + VkResult result; + + result = anv_buffer_view_create(device, pCreateInfo, &view); + if (result != VK_SUCCESS) + return result; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + gen7_fill_buffer_surface_state(view->view.surface_state.map, format, + view->view.offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +VkResult gen7_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} +VkResult gen7_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN7_3DSTATE_SF_pack(NULL, state->gen7.sf, &sf); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult gen7_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil_state = { + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, state->gen7.depth_stencil_state, + &depth_stencil_state); + + struct GEN7_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN7_COLOR_CALC_STATE_pack(NULL, state->gen7.color_calc_state, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +gen7_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; + struct anv_surface *surface; + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + const struct anv_image_view_info *view_type_info = + anv_image_view_info_for_vk_image_view_type(pCreateInfo->viewType); + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + anv_finishme("stencil image views"); + abort(); + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + surface = &image->primary_surface; + break; + default: + unreachable(""); + break; + } + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = anv_format_for_vk_format(pCreateInfo->format); + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = view_type_info->surface_type, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TiledSurface = surface->tile_mode > LINEAR, + .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .RenderCacheReadWriteMode = false, + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = range->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GEN7_MOCS, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + + .MCSEnable = false, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +void +gen7_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; + struct anv_surface *surface = &image->primary_surface; + + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = anv_format_for_vk_format(pCreateInfo->format); + + aview->base.extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + struct GEN7_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = view->format->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TiledSurface = surface->tile_mode > LINEAR, + .TileWalk = surface->tile_mode == XMAJOR ? TILEWALK_XMAJOR : TILEWALK_YMAJOR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .RenderCacheReadWriteMode = WriteOnlyCache, + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = pCreateInfo->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GEN7_MOCS, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .SurfaceMinLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + + .MCSEnable = false, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + + }; + + GEN7_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c index f035baabf74..0d65f169231 100644 --- a/src/vulkan/gen8_state.c +++ b/src/vulkan/gen8_state.c @@ -261,26 +261,6 @@ gen8_image_view_init(struct anv_image_view *iview, GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); } -VkResult -gen8_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_device_alloc(device, sizeof(*view), 8, - VK_SYSTEM_ALLOC_TYPE_API_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_image_view_init(view, device, pCreateInfo, NULL); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; -} - void gen8_color_attachment_view_init(struct anv_color_attachment_view *aview, struct anv_device *device, |