summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-04-03 04:38:12 +0100
committerDave Airlie <[email protected]>2017-04-04 05:55:15 +1000
commit1171b304f30f77b6780891b2b0561b52234a1ec5 (patch)
tree3a3a50fd4390e6ceb8fc4005a5c29ea058a1020e /src/amd/vulkan
parent471c1bc7ccd33caa38bbf7124691ccf6884ac5f8 (diff)
radv: overhaul fragment shader sample positions.
The current code was broken, and I decided to redesign it instead. This puts the sample positions for all samples into the queue constant descriptor buffer after all the spill/ring descriptors. It then uses a single offset register to point how far into the samples the samples for num_samples are. This saves one user sgpr and means we only generate the sample position data in the rare single case where we need it currently. This doesn't fix the failing CTS tests without the followup fix. Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c61
-rw-r--r--src/amd/vulkan/radv_device.c40
-rw-r--r--src/amd/vulkan/radv_private.h2
3 files changed, 67 insertions, 36 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c95388e9fb2..b0a68548295 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -222,6 +222,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->esgs_ring_size_needed = 0;
cmd_buffer->gsvs_ring_size_needed = 0;
cmd_buffer->tess_rings_needed = false;
+ cmd_buffer->sample_positions_needed = false;
if (cmd_buffer->upload.upload_bo)
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
@@ -452,37 +453,35 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
- uint32_t samples_offset;
- void *samples_ptr;
- void *src;
- radv_cmd_buffer_upload_alloc(cmd_buffer, num_samples * 4 * 2, 256, &samples_offset,
- &samples_ptr);
- switch (num_samples) {
- case 1:
- src = cmd_buffer->device->sample_locations_1x;
- break;
- case 2:
- src = cmd_buffer->device->sample_locations_2x;
- break;
- case 4:
- src = cmd_buffer->device->sample_locations_4x;
- break;
- case 8:
- src = cmd_buffer->device->sample_locations_8x;
- break;
- case 16:
- src = cmd_buffer->device->sample_locations_16x;
- break;
- default:
- unreachable("unknown number of samples");
- }
- memcpy(samples_ptr, src, num_samples * 4 * 2);
-
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
- va += samples_offset;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_positions) {
+ uint32_t offset;
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
+ uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+ switch (num_samples) {
+ default:
+ offset = 0;
+ break;
+ case 2:
+ offset = 1;
+ break;
+ case 4:
+ offset = 3;
+ break;
+ case 8:
+ offset = 7;
+ break;
+ case 16:
+ offset = 15;
+ break;
+ }
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
- AC_UD_PS_SAMPLE_POS, va);
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset);
+ cmd_buffer->sample_positions_needed = true;
+ }
}
static void
@@ -2197,6 +2196,8 @@ void radv_CmdExecuteCommands(
primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
if (secondary->tess_rings_needed)
primary->tess_rings_needed = true;
+ if (secondary->sample_positions_needed)
+ primary->sample_positions_needed = true;
if (secondary->ring_offsets_idx != -1) {
if (primary->ring_offsets_idx == -1)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 5c48be1d11a..24219e4ec4f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1188,6 +1188,7 @@ static void radv_dump_trace(struct radv_device *device,
static void
fill_geom_tess_rings(struct radv_queue *queue,
uint32_t *map,
+ bool add_sample_positions,
uint32_t esgs_ring_size,
struct radeon_winsys_bo *esgs_ring_bo,
uint32_t gsvs_ring_size,
@@ -1315,6 +1316,18 @@ fill_geom_tess_rings(struct radv_queue *queue,
S_008F0C_ELEMENT_SIZE(0) |
S_008F0C_INDEX_STRIDE(0) |
S_008F0C_ADD_TID_ENABLE(false);
+ desc += 4;
+
+ /* add sample positions after all rings */
+ memcpy(desc, queue->device->sample_locations_1x, 8);
+ desc += 2;
+ memcpy(desc, queue->device->sample_locations_2x, 16);
+ desc += 4;
+ memcpy(desc, queue->device->sample_locations_4x, 32);
+ desc += 8;
+ memcpy(desc, queue->device->sample_locations_8x, 64);
+ desc += 16;
+ memcpy(desc, queue->device->sample_locations_16x, 128);
}
static unsigned
@@ -1374,6 +1387,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
uint32_t esgs_ring_size,
uint32_t gsvs_ring_size,
bool needs_tess_rings,
+ bool needs_sample_positions,
struct radeon_winsys_cs **initial_preamble_cs,
struct radeon_winsys_cs **continue_preamble_cs)
{
@@ -1385,7 +1399,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
struct radeon_winsys_cs *dest_cs[2] = {0};
- bool add_tess_rings = false;
+ bool add_tess_rings = false, add_sample_positions = false;
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
unsigned max_offchip_buffers;
unsigned hs_offchip_param = 0;
@@ -1393,6 +1407,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
if (needs_tess_rings)
add_tess_rings = true;
}
+ if (!queue->has_sample_positions) {
+ if (needs_sample_positions)
+ add_sample_positions = true;
+ }
tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
hs_offchip_param = radv_get_hs_offchip_param(queue->device,
&max_offchip_buffers);
@@ -1403,7 +1421,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
compute_scratch_size <= queue->compute_scratch_size &&
esgs_ring_size <= queue->esgs_ring_size &&
gsvs_ring_size <= queue->gsvs_ring_size &&
- !add_tess_rings &&
+ !add_tess_rings && !add_sample_positions &&
queue->initial_preamble_cs) {
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
@@ -1485,11 +1503,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
esgs_ring_bo != queue->esgs_ring_bo ||
gsvs_ring_bo != queue->gsvs_ring_bo ||
tess_factor_ring_bo != queue->tess_factor_ring_bo ||
- tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
+ tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
uint32_t size = 0;
if (gsvs_ring_bo || esgs_ring_bo ||
- tess_factor_ring_bo || tess_offchip_ring_bo)
+ tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+ if (add_sample_positions)
+ size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
+ }
else if (scratch_bo)
size = 8; /* 2 dword */
@@ -1541,8 +1562,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
map[1] = rsrc1;
}
- if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo)
- fill_geom_tess_rings(queue, map,
+ if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
+ add_sample_positions)
+ fill_geom_tess_rings(queue, map, add_sample_positions,
esgs_ring_size, esgs_ring_bo,
gsvs_ring_size, gsvs_ring_bo,
tess_factor_ring_size, tess_factor_ring_bo,
@@ -1685,6 +1707,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->descriptor_bo = descriptor_bo;
}
+ if (add_sample_positions)
+ queue->has_sample_positions = true;
+
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1730,6 +1755,7 @@ VkResult radv_QueueSubmit(
VkResult result;
bool fence_emitted = false;
bool tess_rings_needed = false;
+ bool sample_positions_needed = false;
/* Do this first so failing to allocate scratch buffers can't result in
* partially executed submissions. */
@@ -1744,11 +1770,13 @@ VkResult radv_QueueSubmit(
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
tess_rings_needed |= cmd_buffer->tess_rings_needed;
+ sample_positions_needed |= cmd_buffer->sample_positions_needed;
}
}
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
+ sample_positions_needed,
&initial_preamble_cs, &continue_preamble_cs);
if (result != VK_SUCCESS)
return result;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3f92d59ac97..a28c8250367 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -460,6 +460,7 @@ struct radv_queue {
uint32_t esgs_ring_size;
uint32_t gsvs_ring_size;
bool has_tess_rings;
+ bool has_sample_positions;
struct radeon_winsys_bo *scratch_bo;
struct radeon_winsys_bo *descriptor_bo;
@@ -748,6 +749,7 @@ struct radv_cmd_buffer {
uint32_t esgs_ring_size_needed;
uint32_t gsvs_ring_size_needed;
bool tess_rings_needed;
+ bool sample_positions_needed;
int ring_offsets_idx; /* just used for verification */
};