aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2017-10-20 01:27:12 +0200
committerDave Airlie <[email protected]>2017-10-20 06:23:18 +0100
commitec53e527421d475bb813654ee39fccef64c5ad4e (patch)
treebb522a15c6f88dcabbbdbddbbe746ff01a413d48
parent3e7733303041b6ea18fcb3493796fd5703b38411 (diff)
ac/nir: Add ES output to LDS for GFX9.
Reviewed-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c57
1 files changed, 49 insertions, 8 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index de9510c9a40..d7853677fed 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5854,8 +5854,9 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
{
int j;
uint64_t max_output_written = 0;
+ LLVMValueRef lds_base = NULL;
+
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
int param_index;
int length = 4;
@@ -5868,20 +5869,60 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
param_index = shader_io_get_unique_index(i);
max_output_written = MAX2(param_index + (length > 4), max_output_written);
+ }
+
+ outinfo->esgs_itemsize = (max_output_written + 1) * 16;
+
+ if (ctx->ac.chip_class >= GFX9) {
+ unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
+ LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
+ LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
+ LLVMConstInt(ctx->ac.i32, 24, false),
+ LLVMConstInt(ctx->ac.i32, 4, false), false);
+ vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
+ LLVMBuildMul(ctx->ac.builder, wave_idx,
+ LLVMConstInt(ctx->i32, 64, false), ""), "");
+ lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
+ LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
+ }
+
+ for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef dw_addr;
+ LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
+ int param_index;
+ int length = 4;
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = ctx->num_output_clips + ctx->num_output_culls;
+
+ param_index = shader_io_get_unique_index(i);
+
+ if (lds_base) {
+ dw_addr = LLVMBuildAdd(ctx->builder, lds_base,
+ LLVMConstInt(ctx->i32, param_index * 4, false),
+ "");
+ }
for (j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
- ac_build_buffer_store_dword(&ctx->ac,
- ctx->esgs_ring,
- out_val, 1,
- NULL, ctx->es2gs_offset,
- (4 * param_index + j) * 4,
- 1, 1, true, true);
+ if (ctx->ac.chip_class >= GFX9) {
+ lds_store(ctx, dw_addr,
+ LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
+ dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
+ } else {
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->esgs_ring,
+ out_val, 1,
+ NULL, ctx->es2gs_offset,
+ (4 * param_index + j) * 4,
+ 1, 1, true, true);
+ }
}
}
- outinfo->esgs_itemsize = (max_output_written + 1) * 16;
}
static void