diff options
author | Samuel Pitoiset <[email protected]> | 2019-09-17 10:43:15 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2019-10-02 18:09:27 +0200 |
commit | 60f8224171129e4f36b1155d0d30c5dea3f864b3 (patch) | |
tree | 81603607d4255fbb20a6a71cdbec16b9bee46da9 | |
parent | 56e1b1ff0cea2075ee554173fd319306668b340b (diff) |
radv/gfx10: fix storing/loading NGG stream outputs for VS and TES
The LDS storage allocated for stream outputs is 4 * N, where N
is the number of outputs. So, we have to store/load with N as index
and not with the output location as index.
This doesn't fix anything known but it should fix out-of-bounds
access and it also reduces the number of outputs written to the
LDS storage.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 313a5a6bc1d..8339bc02326 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3202,12 +3202,11 @@ static void build_streamout_vertex(struct radv_shader_context *ctx, if (stream != output->stream) continue; - unsigned loc = output->location; struct radv_shader_output_values out = {}; for (unsigned comp = 0; comp < 4; comp++) { tmp = ac_build_gep0(&ctx->ac, vertexptr, - LLVMConstInt(ctx->ac.i32, 4 * loc + comp, false)); + LLVMConstInt(ctx->ac.i32, 4 * i + comp, false)); out.values[comp] = LLVMBuildLoad(builder, tmp, ""); } @@ -3576,6 +3575,7 @@ static LLVMValueRef ngg_nogs_vertex_ptr(struct radv_shader_context *ctx, static void handle_ngg_outputs_post_1(struct radv_shader_context *ctx) { + struct radv_streamout_info *so = &ctx->shader_info->so; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef vertex_ptr = NULL; LLVMValueRef tmp, tmp2; @@ -3588,15 +3588,17 @@ handle_ngg_outputs_post_1(struct radv_shader_context *ctx) vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - if (!(ctx->output_mask & (1ull << i))) - continue; + for (unsigned i = 0; i < so->num_outputs; ++i) { + struct radv_stream_output *output = + &ctx->shader_info->so.outputs[i]; - for (unsigned j = 0; j < 4; j++) { + unsigned loc = output->location; + + for (unsigned comp = 0; comp < 4; comp++) { tmp = ac_build_gep0(&ctx->ac, vertex_ptr, - LLVMConstInt(ctx->ac.i32, 4 * i + j, false)); + LLVMConstInt(ctx->ac.i32, 4 * i + comp, false)); tmp2 = LLVMBuildLoad(builder, - ctx->abi.outputs[4 * i + j], ""); + ctx->abi.outputs[4 * loc + comp], ""); tmp2 = ac_to_integer(&ctx->ac, tmp2); LLVMBuildStore(builder, tmp2, tmp); } |