diff options
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_llvm.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_llvm.h | 1 |
5 files changed, 39 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 70dc94af2ef..0dc3ffdaa38 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -161,4 +161,6 @@ void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0 output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); + output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); + output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0a6f63ff9c0..3aaea4a73e9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2967,4 +2967,6 @@ void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t wor output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); output->inst = R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); + output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); + output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 9e31171c711..a9f5825c349 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -240,11 +240,43 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_context * base = &bld_base->base; + struct pipe_stream_output_info * so = ctx->stream_outputs; unsigned i; unsigned color_count = 0; boolean has_color = false; + if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) { + for (i = 0; i < so->num_outputs; i++) { + unsigned register_index = so->output[i].register_index; + unsigned start_component = so->output[i].start_component; + unsigned num_components = so->output[i].num_components; + unsigned dst_offset = so->output[i].dst_offset; + unsigned chan; + LLVMValueRef elements[4]; + if (dst_offset < start_component) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + elements[chan] = LLVMBuildLoad(base->gallivm->builder, + ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], ""); + } + start_component = 0; + } else { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + elements[chan] = LLVMBuildLoad(base->gallivm->builder, + ctx->soa.outputs[register_index][chan], ""); + } + } + LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); + LLVMValueRef args[4]; + args[0] = output; + args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component); + args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer); + args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component); + lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output", + LLVMVoidTypeInContext(base->gallivm->context), args, 4); + } + } + /* Add the necessary export instructions */ for (i = 0; i < ctx->output_reg_count; i++) { unsigned chan; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 410ffce28ba..ad8b91fb08b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1416,6 +1416,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); radeon_llvm_ctx.chip_class = ctx.bc->chip_class; radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); + radeon_llvm_ctx.stream_outputs = &so; mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { dump = 1; @@ -1572,7 +1573,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } /* Add stream outputs. */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { + if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) { unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; /* Sanity checking. */ diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index b8dc771226a..c5932282901 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -60,6 +60,7 @@ struct radeon_llvm_context { unsigned two_side; struct r600_shader_io * r600_inputs; struct r600_shader_io * r600_outputs; + struct pipe_stream_output_info *stream_outputs; unsigned color_buffer_count; unsigned fs_color_all; |