diff options
author | Jason Ekstrand <[email protected]> | 2015-07-30 14:59:02 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-07-30 15:00:42 -0700 |
commit | 26ba0ad54d6bef6237abfabf5a3f572c325951d3 (patch) | |
tree | dcda857eb102d4fdfdf86e6b72563d5b384677ee /src/vulkan/anv_cmd_buffer.c | |
parent | e379cd9a0e2822851c1745b473521e3a49bfbdd3 (diff) |
vk: Re-name command buffer implementation files
Previously, the command buffer implementation was split between
anv_cmd_buffer.c and anv_cmd_emit.c. However, this naming convention was
confusing because none of the Vulkan entrypoints for anv_cmd_buffer were
actually in anv_cmd_buffer.c. This changes it so that anv_cmd_buffer.c is
what you think it is and the internals are in anv_batch_chain.c.
Diffstat (limited to 'src/vulkan/anv_cmd_buffer.c')
-rw-r--r-- | src/vulkan/anv_cmd_buffer.c | 1821 |
1 files changed, 1160 insertions, 661 deletions
diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index 28a3af7a9b8..3b9e67fdd0f 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -31,896 +31,1395 @@ /** \file anv_cmd_buffer.c * - * This file contains functions related to anv_cmd_buffer as a data - * structure. This involves everything required to create and destroy - * the actual batch buffers as well as link them together and handle - * relocations and surface state. It specifically does *not* contain any - * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. */ -/*-----------------------------------------------------------------------* - * Functions related to anv_reloc_list - *-----------------------------------------------------------------------*/ - -static VkResult -anv_reloc_list_init_clone(struct anv_reloc_list *list, - struct anv_device *device, - const struct anv_reloc_list *other_list) +static void +anv_cmd_state_init(struct anv_cmd_state *state) { - if (other_list) { - list->num_relocs = other_list->num_relocs; - list->array_length = other_list->array_length; - } else { - list->num_relocs = 0; - list->array_length = 256; - } + state->rs_state = NULL; + state->vp_state = NULL; + state->cb_state = NULL; + state->ds_state = NULL; + memset(&state->state_vf, 0, sizeof(state->state_vf)); + memset(&state->descriptors, 0, sizeof(state->descriptors)); + + state->dirty = 0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->pipeline = NULL; + state->vp_state = NULL; + state->rs_state = NULL; + state->ds_state = NULL; +} - list->relocs = - anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; - if (list->relocs == NULL) + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - list->reloc_bos = - anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); + cmd_buffer->device = device; - if (list->reloc_bos == NULL) { - anv_device_free(device, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; - if (other_list) { - memcpy(list->relocs, other_list->relocs, - list->array_length * sizeof(*list->relocs)); - memcpy(list->reloc_bos, other_list->reloc_bos, - list->array_length * sizeof(*list->reloc_bos)); - } + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->level = pCreateInfo->level; + cmd_buffer->opt_flags = 0; + + anv_cmd_state_init(&cmd_buffer->state); + + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; } -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) { - return anv_reloc_list_init_clone(list, device, NULL); + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; } void -anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) { - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = device->scratch_block_pool.size; + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); } -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, - size_t num_additional_relocs) +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) { - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; + cmd_buffer->opt_flags = pBeginInfo->flags; - struct drm_i915_gem_relocation_entry *new_relocs = - anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); - struct anv_bo **new_reloc_bos = - anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_relocs == NULL) { - anv_device_free(device, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* FIXME: We shouldn't be starting on the first subpass */ + anv_cmd_buffer_begin_subpass(cmd_buffer, + &cmd_buffer->state.pass->subpasses[0]); } - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_device_free(device, list->relocs); - anv_device_free(device, list->reloc_bos); - - list->array_length = new_length; - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->state.current_pipeline = UINT32_MAX; return VK_SUCCESS; } -uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) { - struct drm_i915_gem_relocation_entry *entry; - int index; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; - anv_reloc_list_grow(list, device, 1); - /* TODO: Handle failure */ + anv_cmd_buffer_end_batch_buffer(cmd_buffer); - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = 0; - entry->write_domain = 0; + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } - return target_bo->offset + delta; + return VK_SUCCESS; } -static void -anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, - struct anv_reloc_list *other, uint32_t offset) +void anv_CmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) { - anv_reloc_list_grow(list, device, other->num_relocs); - /* TODO: Handle failure */ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; - list->num_relocs += other->num_relocs; + default: + assert(!"invalid bind point"); + break; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch - *-----------------------------------------------------------------------*/ +void anv_CmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) + cmd_buffer->state.vp_state = vp_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) { - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); - void *p = batch->next; + cmd_buffer->state.rs_state = rs_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); - return p; + cmd_buffer->state.cb_state = cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; } -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) { - return anv_reloc_list_add(batch->relocs, batch->device, - location - batch->start, bo, delta); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->state.ds_state = ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; } -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +void anv_CmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) { - uint32_t size, offset; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; - size = other->next - other->start; - assert(size % 4 == 0); + assert(firstSet + setCount < MAX_SETS); - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; - assert(batch->next + size <= batch->end); + cmd_buffer->state.descriptors[firstSet + i].set = set; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); - memcpy(batch->next, other->start, size); + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); - offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->device, - other->relocs, offset); + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - batch->next += size; + dynamic_slot += set_layout->num_dynamic_buffers; + } } -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static VkResult -anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +void anv_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) { - VkResult result; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); - result = anv_reloc_list_init(&bbo->relocs, device); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; - *bbo_out = bbo; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} - return VK_SUCCESS; +void anv_CmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ - return result; + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + } } static VkResult -anv_batch_bo_clone(struct anv_device *device, - const struct anv_batch_bo *other_bbo, - struct anv_batch_bo **bbo_out) +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) { - VkResult result; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t attachments, bias, size; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + attachments = subpass->color_count; + } else { + bias = 0; + attachments = 0; + } - struct anv_batch_bo *bbo = - anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; + if (attachments + surface_count == 0) + return VK_SUCCESS; - result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); - if (result != VK_SUCCESS) - goto fail_bo_alloc; + size = (bias + surface_count) * sizeof(uint32_t); + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; - bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; - *bbo_out = bbo; + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; - return VK_SUCCESS; + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; - fail_bo_alloc: - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_device_free(device, bbo); + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); - return result; -} + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; - bbo->relocs.num_relocs = 0; + memcpy(state.map, view->view.surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->view.bo, view->view.offset); + + bt_map[a] = state.offset; + } + + if (layout == NULL) + return VK_SUCCESS; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + if (!view) + continue; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + anv_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) +static VkResult +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state->map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); + } + } + + return VK_SUCCESS; } -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) { - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; } static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { - anv_reloc_list_finish(&bbo->relocs, device); - anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); - anv_device_free(device, bbo); -} + uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; -static VkResult -anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, - struct list_head *new_list) -{ VkResult result = VK_SUCCESS; - - list_inithead(new_list); - - struct anv_batch_bo *prev_bbo = NULL; - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo; - result = anv_batch_bo_clone(device, bbo, &new_bbo); + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; - list_addtail(&new_bbo->link, new_list); - - if (prev_bbo) { - /* As we clone this list of batch_bo's, they chain one to the - * other using MI_BATCH_BUFFER_START commands. We need to fix up - * those relocations as we go. Fortunately, this is pretty easy - * as it will always be the last relocation in the list. - */ - uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; - } - - prev_bbo = new_bbo; } if (result != VK_SUCCESS) { - list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, device); - } + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - return result; -} + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); -} + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); -static inline struct anv_batch_bo * -anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); -} + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } -struct anv_bo * -anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; } -struct anv_reloc_list * -anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) { - return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + + return state; } -static void -cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) { - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_batch_bo *current_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_state state; + uint32_t *p; - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; - anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &bbo->bo, 0 }, - ); + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - anv_batch_bo_finish(current_bbo, batch); + return state; } static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) { - struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo; + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); if (result != VK_SUCCESS) return result; - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); - list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); return VK_SUCCESS; } -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_bo *surface_bo = - anv_cmd_buffer_current_surface_bo(cmd_buffer); - struct anv_state state; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; - state.offset = align_u32(cmd_buffer->surface_next, alignment); - if (state.offset + size > surface_bo->size) - return (struct anv_state) { 0 }; + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - state.map = surface_bo->map + state.offset; - state.alloc_size = size; - cmd_buffer->surface_next = state.offset + size; + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } - assert(state.offset + size <= surface_bo->size); + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - return state; -} + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - size, alignment); + cmd_buffer->state.compute_dirty = 0; } -VkResult -anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { - struct anv_batch_bo *new_bbo, *old_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; - /* Finish off the old buffer */ - old_bbo->length = cmd_buffer->surface_next; + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); - if (result != VK_SUCCESS) - return result; + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer->device); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; } - *seen_bbo = new_bbo; - cmd_buffer->surface_next = 1; + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } - list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); - return VK_SUCCESS; -} + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } -VkResult -anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo, *surface_bbo; - struct anv_device *device = cmd_buffer->device; - VkResult result; + if (cmd_buffer->state.descriptors_dirty) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); + } - list_inithead(&cmd_buffer->batch_bos); - list_inithead(&cmd_buffer->surface_bos); + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + } - result = anv_batch_bo_create(device, &batch_bo); - if (result != VK_SUCCESS) - return result; + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } - list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.state_vf, pipeline->state_vf); + } - cmd_buffer->batch.device = device; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} - anv_batch_bo_start(batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); +void anv_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - result = anv_batch_bo_create(device, &surface_bbo); - if (result != VK_SUCCESS) - goto fail_batch_bo; + anv_cmd_buffer_flush_state(cmd_buffer); - list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} - int success = anv_vector_init(&cmd_buffer->seen_bbos, - sizeof(struct anv_bo *), - 8 * sizeof(struct anv_bo *)); - if (!success) - goto fail_surface_bo; +void anv_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + anv_cmd_buffer_flush_state(cmd_buffer); - /* Start surface_next at 1 so surface offset 0 is invalid. */ - cmd_buffer->surface_next = 1; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} - return VK_SUCCESS; +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} - fail_surface_bo: - anv_batch_bo_destroy(surface_bbo, device); - fail_batch_bo: - anv_batch_bo_destroy(batch_bo, device); +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void anv_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} - return result; +void anv_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); } -void -anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) { - struct anv_device *device = cmd_buffer->device; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - anv_vector_finish(&cmd_buffer->seen_bbos); +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 - /* Destroy all of the batch buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} - /* Destroy all of the surface state buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->surface_bos, link) { - anv_batch_bo_destroy(bbo, device); - } +void anv_CmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} - anv_device_free(device, cmd_buffer->execbuf2.objects); - anv_device_free(device, cmd_buffer->execbuf2.bos); +void anv_CmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); } -void -anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +void anv_CmdWaitEvents( + VkCmdBuffer cmdBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) { - struct anv_device *device = cmd_buffer->device; + stub(); +} - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); - } - assert(!list_empty(&cmd_buffer->batch_bos)); +void anv_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; - anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), - &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, device); + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ } - assert(!list_empty(&cmd_buffer->batch_bos)); - anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } - cmd_buffer->surface_next = 1; - /* Reset the list of seen buffers */ - cmd_buffer->seen_bbos.head = 0; - cmd_buffer->seen_bbos.tail = 0; + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_batch_bo(cmd_buffer); - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); -} + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } -void -anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - struct anv_batch_bo *surface_bbo = - anv_cmd_buffer_current_surface_bbo(cmd_buffer); + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; - if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } else { - /* If this is a secondary command buffer, we need to determine the - * mode in which it will be executed with vkExecuteCommands. We - * determine this statically here so that this stays in sync with the - * actual ExecuteCommands implementation. - */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < - ANV_CMD_BUFFER_BATCH_SIZE / 2)) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (cmd_buffer->opt_flags & - VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - - /* For chaining mode, we need to increment the number of - * relocations. This is because, when we chain, we need to add - * an MI_BATCH_BUFFER_START command. Adding this command will - * also add a relocation. In order to handle theis we'll - * increment it here and decrement it right before adding the - * MI_BATCH_BUFFER_START command. - */ - anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; - } else { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); } } - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } - surface_bbo->length = cmd_buffer->surface_next; + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); } -static inline VkResult -anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, - struct list_head *list) +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) { - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); - if (bbo_ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + stub(); +} - *bbo_ptr = bbo; +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; } - return VK_SUCCESS; + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); } void -anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary) +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) { - switch (secondary->exec_mode) { - case ANV_CMD_BUFFER_EXEC_MODE_EMIT: - anv_batch_emit_batch(&primary->batch, &secondary->batch); - break; - case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { - struct anv_batch_bo *first_bbo = - list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - - anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &first_bbo->bo, 0 }, - ); - - struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); - uint32_t offset = primary->batch.next - primary->batch.start; - - struct GEN8_MI_BATCH_BUFFER_START ret = { - GEN8_MI_BATCH_BUFFER_START_header, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { &this_bbo->bo, offset }, - }; - last_bbo->relocs.num_relocs++; - GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, - last_bbo->bo.map + last_bbo->length, - &ret); - - anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); - break; - } - case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { - struct list_head copy_list; - VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary->device, - ©_list); - if (result != VK_SUCCESS) - return; /* FIXME */ + cmd_buffer->state.subpass = subpass; - anv_cmd_buffer_add_seen_bbos(primary, ©_list); + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - struct anv_batch_bo *first_bbo = - list_first_entry(©_list, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(©_list, struct anv_batch_bo, link); + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} - cmd_buffer_chain_to_batch_bo(primary, first_bbo); +void anv_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - list_splicetail(©_list, &primary->batch_bos); + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; - anv_batch_bo_continue(last_bbo, &primary->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); + const VkRect2D *render_area = &pRenderPassBegin->renderArea; - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - default: - assert(!"Invalid execution mode"); - } + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); - /* Mark the surface buffer from the secondary as seen */ - anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); } -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) { - struct drm_i915_gem_exec_object2 *obj = NULL; + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); - if (new_objects == NULL) { - anv_device_free(cmd_buffer->device, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } +void anv_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; + assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); - if (relocs != NULL && obj->relocation_count == 0) { - /* This is the first time we've ever seen a list of relocations for - * this BO. Go ahead and set the relocations and then walk the list - * of relocations and add them all. - */ - obj->relocation_count = relocs->num_relocs; - obj->relocs_ptr = (uintptr_t) relocs->relocs; + for (uint32_t i = 0; i < cmdBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - for (size_t i = 0; i < relocs->num_relocs; i++) - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); - } + assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); - return VK_SUCCESS; + anv_cmd_buffer_add_secondary(primary, secondary); + } } -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) { - struct anv_bo *bo; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; + list_inithead(&pool->cmd_buffers); - list->relocs[i].target_handle = bo->index; - } + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; } -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +VkResult anv_DestroyCommandPool( + VkDevice _device, + VkCmdPool cmdPool) { - struct anv_batch *batch = &cmd_buffer->batch; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + anv_ResetCommandPool(_device, cmdPool, 0); - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + anv_device_free(device, pool); - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; - - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + return VK_SUCCESS; +} - cmd_buffer->execbuf2.objects[idx] = - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx] = - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; - cmd_buffer->execbuf2.bos[idx]->index = idx; +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); - cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; - cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = - &first_batch_bo->bo; - first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); } - /* Now we go through and fixup all of the relocation lists to point to - * the correct indices in the object array. We have to do this after we - * reorder the list above as some of the indices may have changed. - */ - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); - - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, - .batch_start_offset = 0, - .batch_len = batch->next - batch->start, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, - .rsvd1 = cmd_buffer->device->context_id, - .rsvd2 = 0, - }; - - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; + return VK_SUCCESS; } |