diff options
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 223 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 35 | ||||
-rw-r--r-- | src/amd/vulkan/radv_extensions.py | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_image.c | 7 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 30 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 26 |
6 files changed, 315 insertions, 7 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e524c321832..b40113d543a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, dest->viewport.count = src->viewport.count; dest->scissor.count = src->scissor.count; dest->discard_rectangle.count = src->discard_rectangle.count; + dest->sample_location.count = src->sample_location.count; if (copy_mask & RADV_DYNAMIC_VIEWPORT) { if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, } } + if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { + if (dest->sample_location.per_pixel != src->sample_location.per_pixel || + dest->sample_location.grid_size.width != src->sample_location.grid_size.width || + dest->sample_location.grid_size.height != src->sample_location.grid_size.height || + memcmp(&dest->sample_location.locations, + &src->sample_location.locations, + src->sample_location.count * sizeof(VkSampleLocationEXT))) { + dest->sample_location.per_pixel = src->sample_location.per_pixel; + dest->sample_location.grid_size = src->sample_location.grid_size; + typed_memcpy(dest->sample_location.locations, + src->sample_location.locations, + src->sample_location.count); + dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; + } + } + cmd_buffer->state.dirty |= dest_mask; } @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, } } +/** + * Convert the user sample locations to hardware sample locations (the values + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). + */ +static void +radv_convert_user_sample_locs(struct radv_sample_locations_state *state, + uint32_t x, uint32_t y, VkOffset2D *sample_locs) +{ + uint32_t x_offset = x % state->grid_size.width; + uint32_t y_offset = y % state->grid_size.height; + uint32_t num_samples = (uint32_t)state->per_pixel; + VkSampleLocationEXT *user_locs; + uint32_t pixel_offset; + + pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; + + assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); + user_locs = &state->locations[pixel_offset]; + + for (uint32_t i = 0; i < num_samples; i++) { + float shifted_pos_x = user_locs[i].x - 0.5; + float shifted_pos_y = user_locs[i].y - 0.5; + + int32_t scaled_pos_x = floor(shifted_pos_x * 16); + int32_t scaled_pos_y = floor(shifted_pos_y * 16); + + sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); + sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); + } +} + +/** + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample + * locations. + */ +static void +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, + uint32_t *sample_locs_pixel) +{ + for (uint32_t i = 0; i < num_samples; i++) { + uint32_t sample_reg_idx = i / 4; + uint32_t sample_loc_idx = i % 4; + int32_t pos_x = sample_locs[i].x; + int32_t pos_y = sample_locs[i].y; + + uint32_t shift_x = 8 * sample_loc_idx; + uint32_t shift_y = shift_x + 4; + + sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; + sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; + } +} + +/** + * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware + * sample locations. + */ +static uint64_t +radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, + VkOffset2D *sample_locs, + uint32_t num_samples) +{ + uint32_t centroid_priorities[num_samples]; + uint32_t sample_mask = num_samples - 1; + uint32_t distances[num_samples]; + uint64_t centroid_priority = 0; + + /* Compute the distances from center for each sample. */ + for (int i = 0; i < num_samples; i++) { + distances[i] = (sample_locs[i].x * sample_locs[i].x) + + (sample_locs[i].y * sample_locs[i].y); + } + + /* Compute the centroid priorities by looking at the distances array. */ + for (int i = 0; i < num_samples; i++) { + uint32_t min_idx = 0; + + for (int j = 1; j < num_samples; j++) { + if (distances[j] < distances[min_idx]) + min_idx = j; + } + + centroid_priorities[i] = min_idx; + distances[min_idx] = 0xffffffff; + } + + /* Compute the final centroid priority. */ + for (int i = 0; i < 8; i++) { + centroid_priority |= + centroid_priorities[i & sample_mask] << (i * 4); + } + + return centroid_priority << 32 | centroid_priority; +} + +/** + * Emit the sample locations that are specified with VK_EXT_sample_locations. + */ +static void +radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_multisample_state *ms = &pipeline->graphics.ms; + struct radv_sample_locations_state *sample_location = + &cmd_buffer->state.dynamic.sample_location; + uint32_t num_samples = (uint32_t)sample_location->per_pixel; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t sample_locs_pixel[4][2] = {}; + VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ + uint32_t max_sample_dist = 0; + uint64_t centroid_priority; + + if (!cmd_buffer->state.dynamic.sample_location.count) + return; + + /* Convert the user sample locations to hardware sample locations. */ + radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]); + radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]); + radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]); + radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]); + + /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ + for (uint32_t i = 0; i < 4; i++) { + radv_compute_sample_locs_pixel(num_samples, sample_locs[i], + sample_locs_pixel[i]); + } + + /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ + centroid_priority = + radv_compute_centroid_priority(cmd_buffer, sample_locs[0], + num_samples); + + /* Compute the maximum sample distance from the specified locations. */ + for (uint32_t i = 0; i < num_samples; i++) { + VkOffset2D offset = sample_locs[0][i]; + max_sample_dist = MAX2(max_sample_dist, + MAX2(abs(offset.x), abs(offset.y))); + } + + /* Emit the specified user sample locations. */ + switch (num_samples) { + case 2: + case 4: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); + break; + case 8: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); + radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]); + radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]); + radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]); + radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]); + break; + default: + unreachable("invalid number of samples"); + } + + /* Emit the maximum sample distance and the centroid priority. */ + uint32_t pa_sc_aa_config = ms->pa_sc_aa_config; + + pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST; + pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist); + + radeon_set_context_reg_seq(cs, R_028BE0_PA_SC_AA_CONFIG, 1); + radeon_emit(cs, pa_sc_aa_config); + + radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); + radeon_emit(cs, centroid_priority); + radeon_emit(cs, centroid_priority >> 32); + + /* GFX9: Flush DFSM when the AA mode changes. */ + if (cmd_buffer->device->dfsm_allowed) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); + } + + cmd_buffer->state.context_roll_without_scissor_emitted = true; +} + static void radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, @@ -1775,6 +1976,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) radv_emit_discard_rectangle(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) + radv_emit_sample_locations(cmd_buffer); + cmd_buffer->state.dirty &= ~states; } @@ -3272,6 +3476,25 @@ void radv_CmdSetDiscardRectangleEXT( state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; } +void radv_CmdSetSampleLocationsEXT( + VkCommandBuffer commandBuffer, + const VkSampleLocationsInfoEXT* pSampleLocationsInfo) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); + + state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; + state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; + state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount; + typed_memcpy(&state->dynamic.sample_location.locations[0], + pSampleLocationsInfo->pSampleLocations, + pSampleLocationsInfo->sampleLocationsCount); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; +} + void radv_CmdExecuteCommands( VkCommandBuffer commandBuffer, uint32_t commandBufferCount, diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c50908bcfcb..9e50038e22a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1367,6 +1367,27 @@ void radv_GetPhysicalDeviceProperties2( props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { + VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = + (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; + /* TODO: The ext is currently disabled because the + * driver needs to handle sample locations during + * layout transitions for depth/stencil surfaces and + * HTILE. + */ + properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_1_BIT; + /* + properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | + VK_SAMPLE_COUNT_4_BIT | + VK_SAMPLE_COUNT_8_BIT; + */ + properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 }; + properties->sampleLocationCoordinateRange[0] = 0.0f; + properties->sampleLocationCoordinateRange[1] = 0.9375f; + properties->sampleLocationSubPixelBits = 4; + properties->variableSampleLocations = VK_FALSE; + break; + } default: break; } @@ -5368,3 +5389,17 @@ VkResult radv_GetCalibratedTimestampsEXT( return VK_SUCCESS; } + +void radv_GetPhysicalDeviceMultisamplePropertiesEXT( + VkPhysicalDevice physicalDevice, + VkSampleCountFlagBits samples, + VkMultisamplePropertiesEXT* pMultisampleProperties) +{ + if (samples & (VK_SAMPLE_COUNT_2_BIT | + VK_SAMPLE_COUNT_4_BIT | + VK_SAMPLE_COUNT_8_BIT)) { + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 }; + } else { + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 }; + } +} diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 0b5af56a435..d6e9d5c034b 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -119,6 +119,7 @@ EXTENSIONS = [ Extension('VK_EXT_memory_priority', 1, True), Extension('VK_EXT_pci_bus_info', 2, True), Extension('VK_EXT_pipeline_creation_feedback', 1, True), + Extension('VK_EXT_sample_locations', 1, False), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= GFX7'), Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= GFX7'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 161997ae196..b69dda39ce1 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -76,6 +76,13 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) return false; + /* TODO: Implement layout transitions with variable sample locations + * before enabling HTILE for depth/stencil images created with this + * flags because the depth decompress pass needs to know them. + */ + if (pCreateInfo->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) + return false; + if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) return false; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index a3076d421a5..ddcbe465883 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1267,6 +1267,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state) return RADV_DYNAMIC_STENCIL_REFERENCE; case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT: return RADV_DYNAMIC_DISCARD_RECTANGLE; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: + return RADV_DYNAMIC_SAMPLE_LOCATIONS; default: unreachable("Unhandled dynamic state"); } @@ -1297,6 +1299,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT)) states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE; + if (!pCreateInfo->pMultisampleState || + !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext, + PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) + states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS; + /* TODO: blend constants & line width. */ return states; @@ -1426,6 +1433,29 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } } + if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) { + const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info = + vk_find_struct_const(pCreateInfo->pMultisampleState->pNext, + PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + /* If sampleLocationsEnable is VK_FALSE, the default sample + * locations are used and the values specified in + * sampleLocationsInfo are ignored. + */ + if (sample_location_info->sampleLocationsEnable) { + const VkSampleLocationsInfoEXT *pSampleLocationsInfo = + &sample_location_info->sampleLocationsInfo; + + assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); + + dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; + dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; + dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount; + typed_memcpy(&dynamic->sample_location.locations[0], + pSampleLocationsInfo->pSampleLocations, + pSampleLocationsInfo->sampleLocationsCount); + } + } + pipeline->dynamic_state.mask = states; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 08b2621685d..828edb56540 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -91,6 +91,7 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWPORTS 16 #define MAX_SCISSORS 16 #define MAX_DISCARD_RECTANGLES 4 +#define MAX_SAMPLE_LOCATIONS 32 #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_PUSH_DESCRIPTORS 32 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16 @@ -852,7 +853,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - RADV_DYNAMIC_ALL = (1 << 10) - 1, + RADV_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, + RADV_DYNAMIC_ALL = (1 << 11) - 1, }; enum radv_cmd_dirty_bits { @@ -868,12 +870,13 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1, - RADV_CMD_DIRTY_PIPELINE = 1 << 10, - RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11, - RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13, - RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 14, + RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 11) - 1, + RADV_CMD_DIRTY_PIPELINE = 1 << 11, + RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 12, + RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 13, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 14, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 15, }; enum radv_cmd_flush_bits { @@ -950,6 +953,13 @@ struct radv_discard_rectangle_state { VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; }; +struct radv_sample_locations_state { + VkSampleCountFlagBits per_pixel; + VkExtent2D grid_size; + uint32_t count; + VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS]; +}; + struct radv_dynamic_state { /** * Bitmask of (1 << VK_DYNAMIC_STATE_*). @@ -992,6 +1002,8 @@ struct radv_dynamic_state { } stencil_reference; struct radv_discard_rectangle_state discard_rectangle; + + struct radv_sample_locations_state sample_location; }; extern const struct radv_dynamic_state default_dynamic_state; |