summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c73
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c37
4 files changed, 67 insertions, 50 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 31b7985fef9..9f79c2aa804 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -327,7 +327,6 @@ struct si_context {
int last_vtx_reuse_depth;
int current_rast_prim; /* primitive type after TES, GS */
bool gs_tri_strip_adj_fix;
- unsigned last_gsvs_itemsize;
/* Scratch buffer */
struct r600_resource *scratch_buffer;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 48ccd83b396..9b495925a6e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5817,6 +5817,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm =
ctx->soa.bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
SI_PARAM_RW_BUFFERS);
@@ -5836,18 +5837,74 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
}
if (ctx->shader->is_gs_copy_shader) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
ctx->gsvs_ring[0] =
build_indexed_load_const(ctx, buf_ptr, offset);
- }
- if (ctx->type == PIPE_SHADER_GEOMETRY) {
- int i;
- for (i = 0; i < 4; i++) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
+ } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+ struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
+ LLVMValueRef base_ring;
+
+ base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+ /* The conceptual layout of the GSVS ring is
+ * v0c0 .. vLv0 v0c1 .. vLc1 ..
+ * but the real memory layout is swizzled across
+ * threads:
+ * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+ * t16v0c0 ..
+ * Override the buffer descriptor accordingly.
+ */
+ LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+ unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size;
+ unsigned num_records;
+
+ num_records = 64;
+ if (ctx->screen->b.chip_class >= VI)
+ num_records *= max_gsvs_emit_size;
+
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ LLVMValueRef ring, tmp;
+
+ if (!ctx->shader->selector->info.num_stream_output_components[stream])
+ continue;
- ctx->gsvs_ring[i] =
- build_indexed_load_const(ctx, buf_ptr, offset);
+ /* Limit on the stride field for <= CIK. */
+ assert(max_gsvs_emit_size < (1 << 14));
+
+ ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
+ tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+ tmp = LLVMBuildAdd(builder, tmp,
+ LLVMConstInt(ctx->i64,
+ max_gsvs_emit_size * 64 * stream, 0), "");
+ ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+ ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
+ tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+ tmp = LLVMBuildOr(builder, tmp,
+ LLVMConstInt(ctx->i32,
+ S_008F04_STRIDE(max_gsvs_emit_size) |
+ S_008F04_SWIZZLE_ENABLE(1), 0), "");
+ ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+ ring = LLVMBuildInsertElement(builder, ring,
+ LLVMConstInt(ctx->i32, num_records, 0),
+ LLVMConstInt(ctx->i32, 2, 0), "");
+ ring = LLVMBuildInsertElement(builder, ring,
+ LLVMConstInt(ctx->i32,
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
+ S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+ S_008F0C_ADD_TID_ENABLE(1),
+ 0),
+ LLVMConstInt(ctx->i32, 3, 0), "");
+ ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
+
+ ctx->gsvs_ring[stream] = ring;
}
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index d8e60249db2..a17dbc73102 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -167,11 +167,7 @@ enum {
SI_ES_RING_ESGS,
SI_GS_RING_ESGS,
- SI_GS_RING_GSVS0,
- SI_GS_RING_GSVS1,
- SI_GS_RING_GSVS2,
- SI_GS_RING_GSVS3,
- SI_VS_RING_GSVS,
+ SI_RING_GSVS,
SI_VS_STREAMOUT_BUF0,
SI_VS_STREAMOUT_BUF1,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ea715695b97..1e9f5f0a217 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2039,47 +2039,14 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
false, false, 0, 0, 0);
}
if (sctx->gsvs_ring) {
- si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
+ si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
false, false, 0, 0, 0);
-
- /* Also update SI_GS_RING_GSVSi descriptors. */
- sctx->last_gsvs_itemsize = 0;
}
return true;
}
-static void si_update_gsvs_ring_bindings(struct si_context *sctx)
-{
- unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
- uint64_t offset;
-
- if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
- return;
-
- sctx->last_gsvs_itemsize = gsvs_itemsize;
-
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, 0);
-
- offset = gsvs_itemsize * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-
- offset = (gsvs_itemsize * 2) * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-
- offset = (gsvs_itemsize * 3) * 64;
- si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3,
- sctx->gsvs_ring, gsvs_itemsize,
- 64, true, true, 4, 16, offset);
-}
-
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer
* 0 if not
@@ -2469,8 +2436,6 @@ bool si_update_shaders(struct si_context *sctx)
if (!si_update_gs_ring_buffers(sctx))
return false;
-
- si_update_gsvs_ring_bindings(sctx);
} else {
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);