summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_nir_to_llvm.c31
-rw-r--r--src/amd/common/ac_nir_to_llvm.h4
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c61
-rw-r--r--src/amd/vulkan/radv_device.c40
-rw-r--r--src/amd/vulkan/radv_private.h2
5 files changed, 87 insertions, 51 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c8ea0da7740..dfe2732f78d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -109,7 +109,7 @@ struct nir_to_llvm_context {
LLVMValueRef hs_ring_tess_factor;
LLVMValueRef prim_mask;
- LLVMValueRef sample_positions;
+ LLVMValueRef sample_pos_offset;
LLVMValueRef persp_sample, persp_center, persp_centroid;
LLVMValueRef linear_sample, linear_center, linear_centroid;
LLVMValueRef front_face;
@@ -573,6 +573,7 @@ static void create_function(struct nir_to_llvm_context *ctx)
ctx->stage == MESA_SHADER_VERTEX ||
ctx->stage == MESA_SHADER_TESS_CTRL ||
ctx->stage == MESA_SHADER_TESS_EVAL ||
+ ctx->stage == MESA_SHADER_FRAGMENT ||
ctx->is_gs_copy_shader)
need_ring_offsets = true;
@@ -584,7 +585,7 @@ static void create_function(struct nir_to_llvm_context *ctx)
need_push_constants = false;
if (need_ring_offsets && !ctx->options->supports_spill) {
- arg_types[arg_idx++] = const_array(ctx->v16i8, 8); /* address of rings */
+ arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* address of rings */
}
/* 1 for each descriptor set */
@@ -679,7 +680,7 @@ static void create_function(struct nir_to_llvm_context *ctx)
arg_types[arg_idx++] = ctx->i32; // GS instance id
break;
case MESA_SHADER_FRAGMENT:
- arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
+ arg_types[arg_idx++] = ctx->i32; /* sample position offset */
user_sgpr_count = arg_idx;
arg_types[arg_idx++] = ctx->i32; /* prim mask */
sgpr_count = arg_idx;
@@ -735,7 +736,7 @@ static void create_function(struct nir_to_llvm_context *ctx)
LLVMPointerType(ctx->i8, CONST_ADDR_SPACE),
NULL, 0, AC_FUNC_ATTR_READNONE);
ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
- const_array(ctx->v16i8, 8), "");
+ const_array(ctx->v16i8, 16), "");
} else
ctx->ring_offsets = LLVMGetParam(ctx->main_function, arg_idx++);
}
@@ -844,9 +845,9 @@ static void create_function(struct nir_to_llvm_context *ctx)
ctx->gs_invocation_id = LLVMGetParam(ctx->main_function, arg_idx++);
break;
case MESA_SHADER_FRAGMENT:
- set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
- user_sgpr_idx += 2;
- ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
+ set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, user_sgpr_idx, 1);
+ user_sgpr_idx += 1;
+ ctx->sample_pos_offset = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
@@ -3518,15 +3519,17 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
LLVMValueRef sample_id)
{
- /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
- LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
- LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
- LLVMValueRef result[2];
+ LLVMValueRef result;
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false));
- result[0] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset0);
- result[1] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset1);
+ ptr = LLVMBuildBitCast(ctx->builder, ptr,
+ const_array(ctx->v2f32, 64), "");
- return ac_build_gather_values(&ctx->ac, result, 2);
+ sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
+ result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
+
+ ctx->shader_info->fs.uses_sample_positions = true;
+ return result;
}
static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index c468d93f428..3d0b456f9f5 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -88,7 +88,7 @@ enum ac_ud_index {
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_LS_TCS_IN_LAYOUT,
AC_UD_VS_MAX_UD,
- AC_UD_PS_SAMPLE_POS = AC_UD_SHADER_START,
+ AC_UD_PS_SAMPLE_POS_OFFSET = AC_UD_SHADER_START,
AC_UD_PS_MAX_UD,
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_MAX_UD,
@@ -109,6 +109,7 @@ enum ac_ud_index {
#define RING_GSVS_GS 4
#define RING_HS_TESS_FACTOR 5
#define RING_HS_TESS_OFFCHIP 6
+#define RING_PS_SAMPLE_POSITIONS 7
// Match MAX_SETS from radv_descriptor_set.h
#define AC_UD_MAX_SETS MAX_SETS
@@ -165,6 +166,7 @@ struct ac_shader_variant_info {
bool force_persample;
bool prim_id_input;
bool layer_input;
+ bool uses_sample_positions;
} fs;
struct {
unsigned block_size[3];
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c95388e9fb2..b0a68548295 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -222,6 +222,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->esgs_ring_size_needed = 0;
cmd_buffer->gsvs_ring_size_needed = 0;
cmd_buffer->tess_rings_needed = false;
+ cmd_buffer->sample_positions_needed = false;
if (cmd_buffer->upload.upload_bo)
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
@@ -452,37 +453,35 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
- uint32_t samples_offset;
- void *samples_ptr;
- void *src;
- radv_cmd_buffer_upload_alloc(cmd_buffer, num_samples * 4 * 2, 256, &samples_offset,
- &samples_ptr);
- switch (num_samples) {
- case 1:
- src = cmd_buffer->device->sample_locations_1x;
- break;
- case 2:
- src = cmd_buffer->device->sample_locations_2x;
- break;
- case 4:
- src = cmd_buffer->device->sample_locations_4x;
- break;
- case 8:
- src = cmd_buffer->device->sample_locations_8x;
- break;
- case 16:
- src = cmd_buffer->device->sample_locations_16x;
- break;
- default:
- unreachable("unknown number of samples");
- }
- memcpy(samples_ptr, src, num_samples * 4 * 2);
-
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
- va += samples_offset;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_positions) {
+ uint32_t offset;
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
+ uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+ switch (num_samples) {
+ default:
+ offset = 0;
+ break;
+ case 2:
+ offset = 1;
+ break;
+ case 4:
+ offset = 3;
+ break;
+ case 8:
+ offset = 7;
+ break;
+ case 16:
+ offset = 15;
+ break;
+ }
- radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
- AC_UD_PS_SAMPLE_POS, va);
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, offset);
+ cmd_buffer->sample_positions_needed = true;
+ }
}
static void
@@ -2197,6 +2196,8 @@ void radv_CmdExecuteCommands(
primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
if (secondary->tess_rings_needed)
primary->tess_rings_needed = true;
+ if (secondary->sample_positions_needed)
+ primary->sample_positions_needed = true;
if (secondary->ring_offsets_idx != -1) {
if (primary->ring_offsets_idx == -1)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 5c48be1d11a..24219e4ec4f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1188,6 +1188,7 @@ static void radv_dump_trace(struct radv_device *device,
static void
fill_geom_tess_rings(struct radv_queue *queue,
uint32_t *map,
+ bool add_sample_positions,
uint32_t esgs_ring_size,
struct radeon_winsys_bo *esgs_ring_bo,
uint32_t gsvs_ring_size,
@@ -1315,6 +1316,18 @@ fill_geom_tess_rings(struct radv_queue *queue,
S_008F0C_ELEMENT_SIZE(0) |
S_008F0C_INDEX_STRIDE(0) |
S_008F0C_ADD_TID_ENABLE(false);
+ desc += 4;
+
+ /* add sample positions after all rings */
+ memcpy(desc, queue->device->sample_locations_1x, 8);
+ desc += 2;
+ memcpy(desc, queue->device->sample_locations_2x, 16);
+ desc += 4;
+ memcpy(desc, queue->device->sample_locations_4x, 32);
+ desc += 8;
+ memcpy(desc, queue->device->sample_locations_8x, 64);
+ desc += 16;
+ memcpy(desc, queue->device->sample_locations_16x, 128);
}
static unsigned
@@ -1374,6 +1387,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
uint32_t esgs_ring_size,
uint32_t gsvs_ring_size,
bool needs_tess_rings,
+ bool needs_sample_positions,
struct radeon_winsys_cs **initial_preamble_cs,
struct radeon_winsys_cs **continue_preamble_cs)
{
@@ -1385,7 +1399,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
struct radeon_winsys_cs *dest_cs[2] = {0};
- bool add_tess_rings = false;
+ bool add_tess_rings = false, add_sample_positions = false;
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
unsigned max_offchip_buffers;
unsigned hs_offchip_param = 0;
@@ -1393,6 +1407,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
if (needs_tess_rings)
add_tess_rings = true;
}
+ if (!queue->has_sample_positions) {
+ if (needs_sample_positions)
+ add_sample_positions = true;
+ }
tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
hs_offchip_param = radv_get_hs_offchip_param(queue->device,
&max_offchip_buffers);
@@ -1403,7 +1421,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
compute_scratch_size <= queue->compute_scratch_size &&
esgs_ring_size <= queue->esgs_ring_size &&
gsvs_ring_size <= queue->gsvs_ring_size &&
- !add_tess_rings &&
+ !add_tess_rings && !add_sample_positions &&
queue->initial_preamble_cs) {
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
@@ -1485,11 +1503,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
esgs_ring_bo != queue->esgs_ring_bo ||
gsvs_ring_bo != queue->gsvs_ring_bo ||
tess_factor_ring_bo != queue->tess_factor_ring_bo ||
- tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
+ tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
uint32_t size = 0;
if (gsvs_ring_bo || esgs_ring_bo ||
- tess_factor_ring_bo || tess_offchip_ring_bo)
+ tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+ if (add_sample_positions)
+ size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
+ }
else if (scratch_bo)
size = 8; /* 2 dword */
@@ -1541,8 +1562,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
map[1] = rsrc1;
}
- if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo)
- fill_geom_tess_rings(queue, map,
+ if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
+ add_sample_positions)
+ fill_geom_tess_rings(queue, map, add_sample_positions,
esgs_ring_size, esgs_ring_bo,
gsvs_ring_size, gsvs_ring_bo,
tess_factor_ring_size, tess_factor_ring_bo,
@@ -1685,6 +1707,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->descriptor_bo = descriptor_bo;
}
+ if (add_sample_positions)
+ queue->has_sample_positions = true;
+
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1730,6 +1755,7 @@ VkResult radv_QueueSubmit(
VkResult result;
bool fence_emitted = false;
bool tess_rings_needed = false;
+ bool sample_positions_needed = false;
/* Do this first so failing to allocate scratch buffers can't result in
* partially executed submissions. */
@@ -1744,11 +1770,13 @@ VkResult radv_QueueSubmit(
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
tess_rings_needed |= cmd_buffer->tess_rings_needed;
+ sample_positions_needed |= cmd_buffer->sample_positions_needed;
}
}
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
+ sample_positions_needed,
&initial_preamble_cs, &continue_preamble_cs);
if (result != VK_SUCCESS)
return result;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3f92d59ac97..a28c8250367 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -460,6 +460,7 @@ struct radv_queue {
uint32_t esgs_ring_size;
uint32_t gsvs_ring_size;
bool has_tess_rings;
+ bool has_sample_positions;
struct radeon_winsys_bo *scratch_bo;
struct radeon_winsys_bo *descriptor_bo;
@@ -748,6 +749,7 @@ struct radv_cmd_buffer {
uint32_t esgs_ring_size_needed;
uint32_t gsvs_ring_size_needed;
bool tess_rings_needed;
+ bool sample_positions_needed;
int ring_offsets_idx; /* just used for verification */
};