diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 73 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 37 |
4 files changed, 67 insertions, 50 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 31b7985fef9..9f79c2aa804 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -327,7 +327,6 @@ struct si_context { int last_vtx_reuse_depth; int current_rast_prim; /* primitive type after TES, GS */ bool gs_tri_strip_adj_fix; - unsigned last_gsvs_itemsize; /* Scratch buffer */ struct r600_resource *scratch_buffer; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 48ccd83b396..9b495925a6e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5817,6 +5817,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); @@ -5836,18 +5837,74 @@ static void preload_ring_buffers(struct si_shader_context *ctx) } if (ctx->shader->is_gs_copy_shader) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS); + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); ctx->gsvs_ring[0] = build_indexed_load_const(ctx, buf_ptr, offset); - } - if (ctx->type == PIPE_SHADER_GEOMETRY) { - int i; - for (i = 0; i < 4; i++) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i); + } else if (ctx->type == PIPE_SHADER_GEOMETRY) { + struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld; + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); + LLVMValueRef base_ring; + + base_ring = build_indexed_load_const(ctx, buf_ptr, offset); + + /* The conceptual layout of the GSVS ring is + * v0c0 .. vLv0 v0c1 .. vLc1 .. + * but the real memory layout is swizzled across + * threads: + * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL + * t16v0c0 .. + * Override the buffer descriptor accordingly. + */ + LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2); + unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size; + unsigned num_records; + + num_records = 64; + if (ctx->screen->b.chip_class >= VI) + num_records *= max_gsvs_emit_size; + + for (unsigned stream = 0; stream < 4; ++stream) { + LLVMValueRef ring, tmp; + + if (!ctx->shader->selector->info.num_stream_output_components[stream]) + continue; - ctx->gsvs_ring[i] = - build_indexed_load_const(ctx, buf_ptr, offset); + /* Limit on the stride field for <= CIK. */ + assert(max_gsvs_emit_size < (1 << 14)); + + ring = LLVMBuildBitCast(builder, base_ring, v2i64, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->zero, ""); + tmp = LLVMBuildAdd(builder, tmp, + LLVMConstInt(ctx->i64, + max_gsvs_emit_size * 64 * stream, 0), ""); + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->one, ""); + tmp = LLVMBuildOr(builder, tmp, + LLVMConstInt(ctx->i32, + S_008F04_STRIDE(max_gsvs_emit_size) | + S_008F04_SWIZZLE_ENABLE(1), 0), ""); + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, num_records, 0), + LLVMConstInt(ctx->i32, 2, 0), ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */ + S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ + S_008F0C_ADD_TID_ENABLE(1), + 0), + LLVMConstInt(ctx->i32, 3, 0), ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, ""); + + ctx->gsvs_ring[stream] = ring; } } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index d8e60249db2..a17dbc73102 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -167,11 +167,7 @@ enum { SI_ES_RING_ESGS, SI_GS_RING_ESGS, - SI_GS_RING_GSVS0, - SI_GS_RING_GSVS1, - SI_GS_RING_GSVS2, - SI_GS_RING_GSVS3, - SI_VS_RING_GSVS, + SI_RING_GSVS, SI_VS_STREAMOUT_BUF0, SI_VS_STREAMOUT_BUF1, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ea715695b97..1e9f5f0a217 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2039,47 +2039,14 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) false, false, 0, 0, 0); } if (sctx->gsvs_ring) { - si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS, + si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS, sctx->gsvs_ring, 0, sctx->gsvs_ring->width0, false, false, 0, 0, 0); - - /* Also update SI_GS_RING_GSVSi descriptors. */ - sctx->last_gsvs_itemsize = 0; } return true; } -static void si_update_gsvs_ring_bindings(struct si_context *sctx) -{ - unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size; - uint64_t offset; - - if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize) - return; - - sctx->last_gsvs_itemsize = gsvs_itemsize; - - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, 0); - - offset = gsvs_itemsize * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); - - offset = (gsvs_itemsize * 2) * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); - - offset = (gsvs_itemsize * 3) * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); -} - /** * @returns 1 if \p sel has been updated to use a new scratch buffer * 0 if not @@ -2469,8 +2436,6 @@ bool si_update_shaders(struct si_context *sctx) if (!si_update_gs_ring_buffers(sctx)) return false; - - si_update_gsvs_ring_bindings(sctx); } else { si_pm4_bind_state(sctx, gs, NULL); si_pm4_bind_state(sctx, es, NULL); |