diff options
-rw-r--r-- | src/intel/vulkan/anv_blorp.c | 3 | ||||
-rw-r--r-- | src/intel/vulkan/anv_image.c | 104 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 60 | ||||
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 364 |
4 files changed, 360 insertions, 171 deletions
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 497ae6fd49f..fc3b717982f 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1758,8 +1758,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, * particular value and don't care about format or clear value. */ const struct anv_address clear_color_addr = - anv_image_get_clear_color_addr(cmd_buffer->device, image, - aspect, level); + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); surf.clear_color_addr = anv_to_blorp_address(clear_color_addr); } diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 11942d0f320..a297cc47320 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -190,46 +190,58 @@ all_formats_ccs_e_compatible(const struct gen_device_info *devinfo, * fast-clear values in non-trivial cases (e.g., outside of a render pass in * which a fast clear has occurred). * - * For the purpose of discoverability, the algorithm used to manage this buffer - * is described here. A clear value in this buffer is updated when a fast clear - * is performed on a subresource. One of two synchronization operations is - * performed in order for a following memory access to use the fast-clear - * value: - * a. Copy the value from the buffer to the surface state object used for - * reading. This is done implicitly when the value is the clear value - * predetermined to be the default in other surface state objects. This - * is currently only done explicitly for the operation below. - * b. Do (a) and use the surface state object to resolve the subresource. - * This is only done during layout transitions for decent performance. + * In order to avoid having multiple clear colors for a single plane of an + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on + * the first slice (level 0, layer 0). At the time of our testing (Jan 17, + * 2018), there were no known applications which would benefit from fast- + * clearing more than just the first slice. * - * With the above scheme, we can fast-clear whenever the hardware allows except - * for two cases in which synchronization becomes impossible or undesirable: - * * The subresource is in the GENERAL layout and is cleared to a value - * other than the special default value. + * The fast clear portion of the image is laid out in the following order: * - * Performing a synchronization operation in order to read from the - * subresource is undesirable in this case. Firstly, b) is not an option - * because a layout transition isn't required between a write and read of - * an image in the GENERAL layout. Secondly, it's undesirable to do a) - * explicitly because it would require large infrastructural changes. The - * Vulkan API supports us in deciding not to optimize this layout by - * stating that using this layout may cause suboptimal performance. NOTE: - * the auxiliary buffer must always be enabled to support a) implicitly. + * * 1 or 4 dwords (depending on hardware generation) for the clear color + * * 1 dword for the anv_fast_clear_type of the clear color + * * On gen9+, 1 dword per level and layer of the image (3D levels count + * multiple layers) in level-major order for compression state. * + * For the purpose of discoverability, the algorithm used to manage + * compression and fast-clears is described here: * - * * For the given miplevel, only some of the layers are cleared at once. + * * On a transition from UNDEFINED or PREINITIALIZED to a defined layout, + * all of the values in the fast clear portion of the image are initialized + * to default values. * - * If the user clears each layer to a different value, then tries to - * render to multiple layers at once, we have no ability to perform a - * synchronization operation in between. a) is not helpful because the - * object can only hold one clear value. b) is not an option because a - * layout transition isn't required in this case. + * * On fast-clear, the clear value is written into surface state and also + * into the buffer and the fast clear type is set appropriately. Both + * setting the fast-clear value in the buffer and setting the fast-clear + * type happen from the GPU using MI commands. + * + * * Whenever a render or blorp operation is performed with CCS_E, we call + * genX(cmd_buffer_mark_image_written) to set the compression state to + * true (which is represented by UINT32_MAX). + * + * * On pipeline barrier transitions, the worst-case transition is computed + * from the image layouts. The command streamer inspects the fast clear + * type and compression state dwords and constructs a predicate. The + * worst-case resolve is performed with the given predicate and the fast + * clear and compression state is set accordingly. + * + * See anv_layout_to_aux_usage and anv_layout_to_fast_clear_type functions for + * details on exactly what is allowed in what layouts. + * + * On gen7-9, we do not have a concept of indirect clear colors in hardware. + * In order to deal with this, we have to do some clear color management. + * + * * For LOAD_OP_LOAD at the top of a renderpass, we have to copy the clear + * value from the buffer into the surface state with MI commands. + * + * * For any blorp operations, we pass the address to the clear value into + * blorp and it knows to copy the clear color. */ static void -add_fast_clear_state_buffer(struct anv_image *image, - VkImageAspectFlagBits aspect, - uint32_t plane, - const struct anv_device *device) +add_aux_state_tracking_buffer(struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t plane, + const struct anv_device *device) { assert(image && device); assert(image->planes[plane].aux_surface.isl.size > 0 && @@ -251,20 +263,24 @@ add_fast_clear_state_buffer(struct anv_image *image, (image->planes[plane].offset + image->planes[plane].size)); } - const unsigned entry_size = anv_fast_clear_state_entry_size(device); - /* There's no padding between entries, so ensure that they're always a - * multiple of 32 bits in order to enable GPU memcpy operations. - */ - assert(entry_size % 4 == 0); + /* Clear color and fast clear type */ + unsigned state_size = device->isl_dev.ss.clear_value_size + 4; - const unsigned plane_state_size = - entry_size * anv_image_aux_levels(image, aspect); + /* We only need to track compression on CCS_E surfaces. */ + if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { + if (image->type == VK_IMAGE_TYPE_3D) { + for (uint32_t l = 0; l < image->levels; l++) + state_size += anv_minify(image->extent.depth, l) * 4; + } else { + state_size += image->levels * image->array_size * 4; + } + } image->planes[plane].fast_clear_state_offset = image->planes[plane].offset + image->planes[plane].size; - image->planes[plane].size += plane_state_size; - image->size += plane_state_size; + image->planes[plane].size += state_size; + image->size += state_size; } /** @@ -437,7 +453,7 @@ make_surface(const struct anv_device *dev, } add_surface(image, &image->planes[plane].aux_surface, plane); - add_fast_clear_state_buffer(image, aspect, plane, dev); + add_aux_state_tracking_buffer(image, aspect, plane, dev); /* For images created without MUTABLE_FORMAT_BIT set, we know that * they will always be used with the original format. In @@ -461,7 +477,7 @@ make_surface(const struct anv_device *dev, &image->planes[plane].aux_surface.isl); if (ok) { add_surface(image, &image->planes[plane].aux_surface, plane); - add_fast_clear_state_buffer(image, aspect, plane, dev); + add_aux_state_tracking_buffer(image, aspect, plane, dev); image->planes[plane].aux_usage = ISL_AUX_USAGE_MCS; } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5f827029139..d38dd9e4220 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2533,50 +2533,58 @@ anv_image_aux_layers(const struct anv_image * const image, } } -static inline unsigned -anv_fast_clear_state_entry_size(const struct anv_device *device) -{ - assert(device); - /* Entry contents: - * +--------------------------------------------+ - * | clear value dword(s) | needs resolve dword | - * +--------------------------------------------+ - */ - - /* Ensure that the needs resolve dword is in fact dword-aligned to enable - * GPU memcpy operations. - */ - assert(device->isl_dev.ss.clear_value_size % 4 == 0); - return device->isl_dev.ss.clear_value_size + 4; -} - static inline struct anv_address anv_image_get_clear_color_addr(const struct anv_device *device, const struct anv_image *image, - VkImageAspectFlagBits aspect, - unsigned level) + VkImageAspectFlagBits aspect) { + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); return (struct anv_address) { .bo = image->planes[plane].bo, .offset = image->planes[plane].bo_offset + - image->planes[plane].fast_clear_state_offset + - anv_fast_clear_state_entry_size(device) * level, + image->planes[plane].fast_clear_state_offset, }; } static inline struct anv_address -anv_image_get_needs_resolve_addr(const struct anv_device *device, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - unsigned level) +anv_image_get_fast_clear_type_addr(const struct anv_device *device, + const struct anv_image *image, + VkImageAspectFlagBits aspect) { struct anv_address addr = - anv_image_get_clear_color_addr(device, image, aspect, level); + anv_image_get_clear_color_addr(device, image, aspect); addr.offset += device->isl_dev.ss.clear_value_size; return addr; } +static inline struct anv_address +anv_image_get_compression_state_addr(const struct anv_device *device, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t array_layer) +{ + assert(level < anv_image_aux_levels(image, aspect)); + assert(array_layer < anv_image_aux_layers(image, aspect, level)); + UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E); + + struct anv_address addr = + anv_image_get_fast_clear_type_addr(device, image, aspect); + addr.offset += 4; /* Go past the fast clear type */ + + if (image->type == VK_IMAGE_TYPE_3D) { + for (uint32_t l = 0; l < level; l++) + addr.offset += anv_minify(image->extent.depth, l) * 4; + } else { + addr.offset += level * image->array_size * 4; + } + addr.offset += array_layer * 4; + + return addr; +} + /* Returns true if a HiZ-enabled depth buffer can be sampled from. */ static inline bool anv_can_sample_with_hiz(const struct gen_device_info * const devinfo, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index e1a4d95e965..89654c221e0 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -311,7 +311,7 @@ color_attachment_compute_aux_usage(struct anv_device * device, /* We only allow fast clears in the GENERAL layout if the auxiliary * buffer is always enabled and the fast-clear value is all 0's. See - * add_fast_clear_state_buffer() for more information. + * add_aux_state_tracking_buffer() for more information. */ if (cmd_state->pass->attachments[att].first_subpass_layout == VK_IMAGE_LAYOUT_GENERAL && @@ -407,28 +407,48 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, #define MI_PREDICATE_SRC0 0x2400 #define MI_PREDICATE_SRC1 0x2408 -/* Manages the state of an color image subresource to ensure resolves are - * performed properly. - */ static void -genX(set_image_needs_resolve)(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - unsigned level, bool needs_resolve) +set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t level, + uint32_t base_layer, uint32_t layer_count, + bool compressed) { - assert(cmd_buffer && image); - assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); - assert(level < anv_image_aux_levels(image, aspect)); + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - /* The HW docs say that there is no way to guarantee the completion of - * the following command. We use it nevertheless because it shows no - * issues in testing is currently being used in the GL driver. - */ + /* We only have compression tracking for CCS_E */ + if (image->planes[plane].aux_usage != ISL_AUX_USAGE_CCS_E) + return; + + for (uint32_t a = 0; a < layer_count; a++) { + uint32_t layer = base_layer + a; + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = anv_image_get_compression_state_addr(cmd_buffer->device, + image, aspect, + level, layer); + sdi.ImmediateData = compressed ? UINT32_MAX : 0; + } + } +} + +static void +set_image_fast_clear_state(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + enum anv_fast_clear_type fast_clear) +{ anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = anv_image_get_needs_resolve_addr(cmd_buffer->device, - image, aspect, level); - sdi.ImmediateData = needs_resolve; + sdi.Address = anv_image_get_fast_clear_type_addr(cmd_buffer->device, + image, aspect); + sdi.ImmediateData = fast_clear; } + + /* Whenever we have fast-clear, we consider that slice to be compressed. + * This makes building predicates much easier. + */ + if (fast_clear != ANV_FAST_CLEAR_NONE) + set_image_compressed_bit(cmd_buffer, image, aspect, 0, 0, 1, true); } #if GEN_IS_HASWELL || GEN_GEN >= 8 @@ -451,32 +471,169 @@ mi_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2) #define CS_GPR(n) (0x2600 + (n) * 8) static void -genX(load_needs_resolve_predicate)(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - unsigned level) +anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t level, uint32_t array_layer, + enum isl_aux_op resolve_op, + enum anv_fast_clear_type fast_clear_supported) { - assert(cmd_buffer && image); - assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); - assert(level < anv_image_aux_levels(image, aspect)); + const uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + struct anv_address fast_clear_type_addr = + anv_image_get_fast_clear_type_addr(cmd_buffer->device, image, aspect); + +#if GEN_GEN >= 9 + /* Name some registers */ + const int image_fc_reg = MI_ALU_REG0; + const int fc_imm_reg = MI_ALU_REG1; + const int pred_reg = MI_ALU_REG2; + + uint32_t *dw; + + if (resolve_op == ISL_AUX_OP_FULL_RESOLVE) { + /* In this case, we're doing a full resolve which means we want the + * resolve to happen if any compression (including fast-clears) is + * present. + * + * In order to simplify the logic a bit, we make the assumption that, + * if the first slice has been fast-cleared, it is also marked as + * compressed. See also set_image_fast_clear_state. + */ + struct anv_address compression_state_addr = + anv_image_get_compression_state_addr(cmd_buffer->device, image, + aspect, level, array_layer); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = MI_PREDICATE_SRC0; + lrm.MemoryAddress = compression_state_addr; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = compression_state_addr; + sdi.ImmediateData = 0; + } + + if (level == 0 && array_layer == 0) { + /* If the predicate is true, we want to write 0 to the fast clear type + * and, if it's false, leave it alone. We can do this by writing + * + * clear_type = clear_type & ~predicate; + */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = CS_GPR(image_fc_reg); + lrm.MemoryAddress = fast_clear_type_addr; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_REG), lrr) { + lrr.DestinationRegisterAddress = CS_GPR(pred_reg); + lrr.SourceRegisterAddress = MI_PREDICATE_SRC0; + } + + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, image_fc_reg); + dw[2] = mi_alu(MI_ALU_LOADINV, MI_ALU_SRCB, pred_reg); + dw[3] = mi_alu(MI_ALU_AND, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, image_fc_reg, MI_ALU_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { + srm.MemoryAddress = fast_clear_type_addr; + srm.RegisterAddress = CS_GPR(image_fc_reg); + } + } + } else if (level == 0 && array_layer == 0) { + /* In this case, we are doing a partial resolve to get rid of fast-clear + * colors. We don't care about the compression state but we do care + * about how much fast clear is allowed by the final layout. + */ + assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); + assert(fast_clear_supported < ANV_FAST_CLEAR_ANY); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = CS_GPR(image_fc_reg); + lrm.MemoryAddress = fast_clear_type_addr; + } + emit_lri(&cmd_buffer->batch, CS_GPR(image_fc_reg) + 4, 0); + + emit_lri(&cmd_buffer->batch, CS_GPR(fc_imm_reg), fast_clear_supported); + emit_lri(&cmd_buffer->batch, CS_GPR(fc_imm_reg) + 4, 0); + + /* We need to compute (fast_clear_supported < image->fast_clear). + * We do this by subtracting and storing the carry bit. + */ + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, fc_imm_reg); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, image_fc_reg); + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, pred_reg, MI_ALU_CF); + + /* Store the predicate */ + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(pred_reg)); + + /* If the predicate is true, we want to write 0 to the fast clear type + * and, if it's false, leave it alone. We can do this by writing + * + * clear_type = clear_type & ~predicate; + */ + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, image_fc_reg); + dw[2] = mi_alu(MI_ALU_LOADINV, MI_ALU_SRCB, pred_reg); + dw[3] = mi_alu(MI_ALU_AND, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, image_fc_reg, MI_ALU_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { + srm.RegisterAddress = CS_GPR(image_fc_reg); + srm.MemoryAddress = fast_clear_type_addr; + } + } else { + /* In this case, we're trying to do a partial resolve on a slice that + * doesn't have clear color. There's nothing to do. + */ + assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); + return; + } - const struct anv_address resolve_flag_addr = - anv_image_get_needs_resolve_addr(cmd_buffer->device, - image, aspect, level); +#else /* GEN_GEN <= 8 */ + assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); + assert(fast_clear_supported != ANV_FAST_CLEAR_ANY); - /* Make the pending predicated resolve a no-op if one is not needed. - * predicate = do_resolve = resolve_flag != 0; + /* We don't support fast clears on anything other than the first slice. */ + if (level > 0 || array_layer > 0) + return; + + /* On gen8, we don't have a concept of default clear colors because we + * can't sample from CCS surfaces. It's enough to just load the fast clear + * state into the predicate register. */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = MI_PREDICATE_SRC0; + lrm.MemoryAddress = fast_clear_type_addr; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address = fast_clear_type_addr; + sdi.ImmediateData = 0; + } +#endif + + /* We use the first half of src0 for the actual predicate. Set the second + * half of src0 and all of src1 to 0 as the predicate operation will be + * doing an implicit src0 != src1. + */ + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0); emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); - emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0); - emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, - resolve_flag_addr.bo, resolve_flag_addr.offset); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { mip.LoadOperation = LOAD_LOADINV; mip.CombineOperation = COMBINE_SET; mip.CompareOperation = COMPARE_SRCS_EQUAL; } + + /* CCS_D only supports full resolves and BLORP will assert on us if we try + * to do a partial resolve on a CCS_D surface. + */ + if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE && + image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) + resolve_op = ISL_AUX_OP_FULL_RESOLVE; + + anv_image_ccs_op(cmd_buffer, image, aspect, level, + array_layer, 1, resolve_op, true); } void @@ -490,17 +647,30 @@ genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer, { /* The aspect must be exactly one of the image aspects. */ assert(_mesa_bitcount(aspect) == 1 && (aspect & image->aspects)); + + /* The only compression types with more than just fast-clears are MCS, + * CCS_E, and HiZ. With HiZ we just trust the layout and don't actually + * track the current fast-clear and compression state. This leaves us + * with just MCS and CCS_E. + */ + if (aux_usage != ISL_AUX_USAGE_CCS_E && + aux_usage != ISL_AUX_USAGE_MCS) + return; + + set_image_compressed_bit(cmd_buffer, image, aspect, + level, base_layer, layer_count, true); } static void -init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - VkImageAspectFlagBits aspect, - unsigned level) +init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect) { assert(cmd_buffer && image); assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); - assert(level < anv_image_aux_levels(image, aspect)); + + set_image_fast_clear_state(cmd_buffer, image, aspect, + ANV_FAST_CLEAR_NONE); uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); enum isl_aux_usage aux_usage = image->planes[plane].aux_usage; @@ -517,7 +687,7 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer, * values in the clear value dword(s). */ struct anv_address addr = - anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect, level); + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); unsigned i = 0; for (; i < cmd_buffer->device->isl_dev.ss.clear_value_size; i += 4) { anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { @@ -558,19 +728,17 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer, struct anv_state surface_state, const struct anv_image *image, VkImageAspectFlagBits aspect, - unsigned level, bool copy_from_surface_state) { assert(cmd_buffer && image); assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); - assert(level < anv_image_aux_levels(image, aspect)); struct anv_bo *ss_bo = &cmd_buffer->device->surface_state_pool.block_pool.bo; uint32_t ss_clear_offset = surface_state.offset + cmd_buffer->device->isl_dev.ss.clear_value_offset; const struct anv_address entry_addr = - anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect, level); + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size; if (copy_from_surface_state) { @@ -660,18 +828,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (base_layer >= anv_image_aux_layers(image, aspect, base_level)) return; - /* A transition of a 3D subresource works on all slices at a time. */ - if (image->type == VK_IMAGE_TYPE_3D) { - base_layer = 0; - layer_count = anv_minify(image->extent.depth, base_level); - } - - /* We're interested in the subresource range subset that has aux data. */ - level_count = MIN2(level_count, anv_image_aux_levels(image, aspect) - base_level); - layer_count = MIN2(layer_count, - anv_image_aux_layers(image, aspect, base_level) - base_layer); - last_level_num = base_level + level_count; - assert(image->tiling == VK_IMAGE_TILING_OPTIMAL); if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED || @@ -684,8 +840,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, * * Initialize the relevant clear buffer entries. */ - for (unsigned level = base_level; level < last_level_num; level++) - init_fast_clear_state_entry(cmd_buffer, image, aspect, level); + if (base_level == 0 && base_layer == 0) + init_fast_clear_color(cmd_buffer, image, aspect); /* Initialize the aux buffers to enable correct rendering. In order to * ensure that things such as storage images work correctly, aux buffers @@ -723,13 +879,18 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (image->samples == 1) { for (uint32_t l = 0; l < level_count; l++) { const uint32_t level = base_level + l; - const uint32_t level_layer_count = + uint32_t level_layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, level)); + anv_image_ccs_op(cmd_buffer, image, aspect, level, base_layer, level_layer_count, ISL_AUX_OP_AMBIGUATE, false); - genX(set_image_needs_resolve)(cmd_buffer, image, - aspect, level, false); + + if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { + set_image_compressed_bit(cmd_buffer, image, aspect, + level, base_layer, level_layer_count, + false); + } } } else { if (image->samples == 4 || image->samples == 16) { @@ -782,13 +943,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, final_aux_usage != ISL_AUX_USAGE_CCS_E) resolve_op = ISL_AUX_OP_FULL_RESOLVE; - /* CCS_D only supports full resolves and BLORP will assert on us if we try - * to do a partial resolve on a CCS_D surface. - */ - if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE && - initial_aux_usage == ISL_AUX_USAGE_CCS_D) - resolve_op = ISL_AUX_OP_FULL_RESOLVE; - if (resolve_op == ISL_AUX_OP_NONE) return; @@ -812,19 +966,17 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; - for (uint32_t level = base_level; level < last_level_num; level++) { + for (uint32_t l = 0; l < level_count; l++) { + uint32_t level = base_level + l; + uint32_t level_layer_count = + MIN2(layer_count, anv_image_aux_layers(image, aspect, level)); - /* The number of layers changes at each 3D miplevel. */ - if (image->type == VK_IMAGE_TYPE_3D) { - layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, level)); + for (uint32_t a = 0; a < level_layer_count; a++) { + uint32_t array_layer = base_layer + a; + anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect, + level, array_layer, resolve_op, + final_fast_clear); } - - genX(load_needs_resolve_predicate)(cmd_buffer, image, aspect, level); - - anv_image_ccs_op(cmd_buffer, image, aspect, level, - base_layer, layer_count, resolve_op, true); - - genX(set_image_needs_resolve)(cmd_buffer, image, aspect, level, false); } cmd_buffer->state.pending_pipe_bits |= @@ -1488,12 +1640,20 @@ void genX(CmdPipelineBarrier)( anv_image_expand_aspects(image, range->aspectMask); uint32_t aspect_bit; + uint32_t base_layer, layer_count; + if (image->type == VK_IMAGE_TYPE_3D) { + base_layer = 0; + layer_count = anv_minify(image->extent.depth, range->baseMipLevel); + } else { + base_layer = range->baseArrayLayer; + layer_count = anv_get_layerCount(image, range); + } + anv_foreach_image_aspect_bit(aspect_bit, image, color_aspects) { transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit, range->baseMipLevel, anv_get_levelCount(image, range), - range->baseArrayLayer, - anv_get_layerCount(image, range), + base_layer, layer_count, pImageMemoryBarriers[i].oldLayout, pImageMemoryBarriers[i].newLayout); } @@ -3152,10 +3312,20 @@ cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT, target_layout); } else if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + + uint32_t base_layer, layer_count; + if (image->type == VK_IMAGE_TYPE_3D) { + base_layer = 0; + layer_count = anv_minify(iview->image->extent.depth, + iview->planes[0].isl.base_level); + } else { + base_layer = iview->planes[0].isl.base_array_layer; + layer_count = iview->planes[0].isl.array_len; + } + transition_color_buffer(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, iview->planes[0].isl.base_level, 1, - iview->planes[0].isl.base_array_layer, - iview->planes[0].isl.array_len, + base_layer, layer_count, att_state->current_layout, target_layout); } @@ -3203,28 +3373,26 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state, iview->image, VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, true /* copy from ss */); /* Fast-clears impact whether or not a resolve will be necessary. */ - if (iview->image->planes[0].aux_usage == ISL_AUX_USAGE_CCS_E && - att_state->clear_color_is_zero) { - /* This image always has the auxiliary buffer enabled. We can mark - * the subresource as not needing a resolve because the clear color - * will match what's in every RENDER_SURFACE_STATE object when it's - * being used for sampling. + if (att_state->clear_color_is_zero) { + /* This image has the auxiliary buffer enabled. We can mark the + * subresource as not needing a resolve because the clear color + * will match what's in every RENDER_SURFACE_STATE object when + * it's being used for sampling. */ - genX(set_image_needs_resolve)(cmd_buffer, iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, - false); + set_image_fast_clear_state(cmd_buffer, iview->image, + VK_IMAGE_ASPECT_COLOR_BIT, + ANV_FAST_CLEAR_DEFAULT_VALUE); } else { - genX(set_image_needs_resolve)(cmd_buffer, iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, - true); + set_image_fast_clear_state(cmd_buffer, iview->image, + VK_IMAGE_ASPECT_COLOR_BIT, + ANV_FAST_CLEAR_ANY); } - } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { + } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD && + iview->planes[0].isl.base_level == 0 && + iview->planes[0].isl.base_array_layer == 0) { /* The attachment may have been fast-cleared in a previous render * pass and the value is needed now. Update the surface state(s). * @@ -3233,7 +3401,6 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color.state, iview->image, VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, false /* copy to ss */); if (need_input_attachment_state(rp_att) && @@ -3241,7 +3408,6 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer) genX(copy_fast_clear_dwords)(cmd_buffer, att_state->input.state, iview->image, VK_IMAGE_ASPECT_COLOR_BIT, - iview->planes[0].isl.base_level, false /* copy to ss */); } } |