diff options
author | Connor Abbott <[email protected]> | 2019-05-29 15:48:06 +0200 |
---|---|---|
committer | Connor Abbott <[email protected]> | 2019-05-31 11:02:11 +0200 |
commit | 6571032af1d2c00150c4a6699a5fc385dd174ab9 (patch) | |
tree | 0606ec704e3ca84555d64a433a8029d9aea0dcfa | |
parent | ca19f7639a0fc1da1e8bc6bb638495af1a6a5798 (diff) |
radeonsi/nir: Correctly handle double TCS/TES varyings
ac expands the store to 32-bit components for us, but we still have to
deal with storing up to 8 components, and when a varying is split across
two vec4 slots we have to calculate the address again for the second
slot, since they aren't adjacent in memory. I didn't do this on the ac
level because we should generate better indexing arithmetic for the lds
store, where slots are contiguous.
Reviewed-by: Timothy Arceri <[email protected]>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 32 |
1 files changed, 28 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d2927d0254b..5bd65e0f65c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1253,8 +1253,20 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMValueRef value[4]; for (unsigned i = 0; i < num_components; i++) { unsigned offset = i; - if (llvm_type_is_64bit(ctx, type)) + if (llvm_type_is_64bit(ctx, type)) { offset *= 2; + if (offset == 4) { + addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, + vertex_index, + param_index, + driver_location + 1, + info->input_semantic_name, + info->input_semantic_index, + is_patch); + } + + offset = offset % 4; + } offset += component; value[i + component] = buffer_load(&ctx->bld_base, type, offset, @@ -1376,7 +1388,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, unsigned driver_location = var->data.driver_location; LLVMValueRef dw_addr, stride; LLVMValueRef buffer, base, addr; - LLVMValueRef values[4]; + LLVMValueRef values[8]; bool skip_lds_store; bool is_tess_factor = false, is_tess_inner = false; @@ -1438,11 +1450,22 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, info->output_semantic_index, is_patch); - for (unsigned chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); + unsigned buffer_store_offset = chan % 4; + if (chan == 4) { + addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, + vertex_index, + param_index, + driver_location + 1, + info->output_semantic_name, + info->output_semantic_index, + is_patch); + } + /* Skip LDS stores if there is no LDS read of this output. */ if (!skip_lds_store) lds_store(ctx, chan, dw_addr, value); @@ -1453,7 +1476,8 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, if (writemask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base, - 4 * chan, 1, 0, true, false); + 4 * buffer_store_offset, + 1, 0, true, false); } /* Write tess factors into VGPRs for the epilog. */ |