summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c61
-rw-r--r--src/amd/vulkan/radv_device.c40
-rw-r--r--src/amd/vulkan/radv_private.h2
3 files changed, 67 insertions, 36 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c95388e9fb2..b0a68548295 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -222,6 +222,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->esgs_ring_size_needed = 0;
cmd_buffer->gsvs_ring_size_needed = 0;
cmd_buffer->tess_rings_needed = false;
+ cmd_buffer->sample_positions_needed = false;
if (cmd_buffer->upload.upload_bo)
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
@@ -452,37 +453,35 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
- uint32_t samples_offset;
- void *samples_ptr;
- void *src;
- radv_cmd_buffer_upload_alloc(cmd_buffer, num_samples * 4 * 2, 256, &samples_offset,
- &samples_ptr);
- switch (num_samples) {
- case 1:
- src = cmd_buffer->device->sample_locations_1x;
- break;
- case 2:
- src = cmd_buffer->device->sample_locations_2x;
- break;
- case 4:
- src = cmd_buffer->device->sample_locations_4x;
- break;
- case 8:
- src = cmd_buffer->device->sample_locations_8x;
- break;
- case 16:
- src = cmd_buffer->device->sample_locations_16x;
- break;
- default:
- unreachable("unknown number of samples");
- }
- memcpy(samples_ptr, src, num_samples * 4 * 2);
-
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
- va += samples_offset;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_positions) {
+ uint32_t offset;
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
+ uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+ switch (num_samples) {
+ default:
+ offset = 0;
+ break;
+ case 2:
+ offset = 1;
+ break;
+ case 4:
+ offset = 3;
+ break;
+ case 8:
+ offset = 7;
+ break;
+ case 16:
+ offset = 15;
+ break;
+ }
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
- AC_UD_PS_SAMPLE_POS, va);
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset);
+ cmd_buffer->sample_positions_needed = true;
+ }
}
static void
@@ -2197,6 +2196,8 @@ void radv_CmdExecuteCommands(
primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
if (secondary->tess_rings_needed)
primary->tess_rings_needed = true;
+ if (secondary->sample_positions_needed)
+ primary->sample_positions_needed = true;
if (secondary->ring_offsets_idx != -1) {
if (primary->ring_offsets_idx == -1)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 5c48be1d11a..24219e4ec4f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1188,6 +1188,7 @@ static void radv_dump_trace(struct radv_device *device,
static void
fill_geom_tess_rings(struct radv_queue *queue,
uint32_t *map,
+ bool add_sample_positions,
uint32_t esgs_ring_size,
struct radeon_winsys_bo *esgs_ring_bo,
uint32_t gsvs_ring_size,
@@ -1315,6 +1316,18 @@ fill_geom_tess_rings(struct radv_queue *queue,
S_008F0C_ELEMENT_SIZE(0) |
S_008F0C_INDEX_STRIDE(0) |
S_008F0C_ADD_TID_ENABLE(false);
+ desc += 4;
+
+ /* add sample positions after all rings */
+ memcpy(desc, queue->device->sample_locations_1x, 8);
+ desc += 2;
+ memcpy(desc, queue->device->sample_locations_2x, 16);
+ desc += 4;
+ memcpy(desc, queue->device->sample_locations_4x, 32);
+ desc += 8;
+ memcpy(desc, queue->device->sample_locations_8x, 64);
+ desc += 16;
+ memcpy(desc, queue->device->sample_locations_16x, 128);
}
static unsigned
@@ -1374,6 +1387,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
uint32_t esgs_ring_size,
uint32_t gsvs_ring_size,
bool needs_tess_rings,
+ bool needs_sample_positions,
struct radeon_winsys_cs **initial_preamble_cs,
struct radeon_winsys_cs **continue_preamble_cs)
{
@@ -1385,7 +1399,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
struct radeon_winsys_cs *dest_cs[2] = {0};
- bool add_tess_rings = false;
+ bool add_tess_rings = false, add_sample_positions = false;
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
unsigned max_offchip_buffers;
unsigned hs_offchip_param = 0;
@@ -1393,6 +1407,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
if (needs_tess_rings)
add_tess_rings = true;
}
+ if (!queue->has_sample_positions) {
+ if (needs_sample_positions)
+ add_sample_positions = true;
+ }
tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
hs_offchip_param = radv_get_hs_offchip_param(queue->device,
&max_offchip_buffers);
@@ -1403,7 +1421,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
compute_scratch_size <= queue->compute_scratch_size &&
esgs_ring_size <= queue->esgs_ring_size &&
gsvs_ring_size <= queue->gsvs_ring_size &&
- !add_tess_rings &&
+ !add_tess_rings && !add_sample_positions &&
queue->initial_preamble_cs) {
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
@@ -1485,11 +1503,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
esgs_ring_bo != queue->esgs_ring_bo ||
gsvs_ring_bo != queue->gsvs_ring_bo ||
tess_factor_ring_bo != queue->tess_factor_ring_bo ||
- tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
+ tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
uint32_t size = 0;
if (gsvs_ring_bo || esgs_ring_bo ||
- tess_factor_ring_bo || tess_offchip_ring_bo)
+ tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+ if (add_sample_positions)
+ size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
+ }
else if (scratch_bo)
size = 8; /* 2 dword */
@@ -1541,8 +1562,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
map[1] = rsrc1;
}
- if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo)
- fill_geom_tess_rings(queue, map,
+ if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
+ add_sample_positions)
+ fill_geom_tess_rings(queue, map, add_sample_positions,
esgs_ring_size, esgs_ring_bo,
gsvs_ring_size, gsvs_ring_bo,
tess_factor_ring_size, tess_factor_ring_bo,
@@ -1685,6 +1707,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->descriptor_bo = descriptor_bo;
}
+ if (add_sample_positions)
+ queue->has_sample_positions = true;
+
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1730,6 +1755,7 @@ VkResult radv_QueueSubmit(
VkResult result;
bool fence_emitted = false;
bool tess_rings_needed = false;
+ bool sample_positions_needed = false;
/* Do this first so failing to allocate scratch buffers can't result in
* partially executed submissions. */
@@ -1744,11 +1770,13 @@ VkResult radv_QueueSubmit(
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
tess_rings_needed |= cmd_buffer->tess_rings_needed;
+ sample_positions_needed |= cmd_buffer->sample_positions_needed;
}
}
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
+ sample_positions_needed,
&initial_preamble_cs, &continue_preamble_cs);
if (result != VK_SUCCESS)
return result;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3f92d59ac97..a28c8250367 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -460,6 +460,7 @@ struct radv_queue {
uint32_t esgs_ring_size;
uint32_t gsvs_ring_size;
bool has_tess_rings;
+ bool has_sample_positions;
struct radeon_winsys_bo *scratch_bo;
struct radeon_winsys_bo *descriptor_bo;
@@ -748,6 +749,7 @@ struct radv_cmd_buffer {
uint32_t esgs_ring_size_needed;
uint32_t gsvs_ring_size_needed;
bool tess_rings_needed;
+ bool sample_positions_needed;
int ring_offsets_idx; /* just used for verification */
};