diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/radeonsi_shader.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_shader.c | 565 |
1 files changed, 565 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c new file mode 100644 index 00000000000..50f2e39314f --- /dev/null +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -0,0 +1,565 @@ + +#include "gallivm/lp_bld_tgsi_action.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_tgsi.h" +#include "radeon_llvm.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" + +#include "radeonsi_pipe.h" +#include "radeonsi_shader.h" +#include "sid.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> + +/* +static ps_remap_inputs( + struct tgsi_llvm_context * tl_ctx, + unsigned tgsi_index, + unsigned tgsi_chan) +{ + : +} + +struct si_input +{ + struct list_head head; + unsigned tgsi_index; + unsigned tgsi_chan; + unsigned order; +}; +*/ + + +struct si_shader_context +{ + struct radeon_llvm_context radeon_bld; + struct r600_context *rctx; + struct tgsi_parse_context parse; + struct tgsi_token * tokens; + struct si_pipe_shader *shader; + unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ +/* unsigned num_inputs; */ +/* struct list_head inputs; */ +/* unsigned * input_mappings *//* From TGSI to SI hw */ +/* struct tgsi_shader_info info;*/ +}; + +static struct si_shader_context * si_shader_context( + struct lp_build_tgsi_context * bld_base) +{ + return (struct si_shader_context *)bld_base; +} + + +#define PERSPECTIVE_BASE 0 +#define LINEAR_BASE 9 + +#define SAMPLE_OFFSET 0 +#define CENTER_OFFSET 2 +#define CENTROID_OFSET 4 + +#define USE_SGPR_MAX_SUFFIX_LEN 5 + +enum sgpr_type { + SGPR_I32, + SGPR_I64, + SGPR_PTR_V4I32, + SGPR_PTR_V8I32 +}; + +static LLVMValueRef use_sgpr( + struct gallivm_state * gallivm, + enum sgpr_type type, + unsigned sgpr) +{ + LLVMValueRef sgpr_index; + LLVMValueRef sgpr_value; + LLVMTypeRef ret_type; + + sgpr_index = lp_build_const_int32(gallivm, sgpr); + + if (type == SGPR_I32) { + ret_type = LLVMInt32TypeInContext(gallivm->context); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgpr.i32", + ret_type, sgpr_index); + } + + ret_type = LLVMInt64TypeInContext(gallivm->context); + sgpr_value = lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgpr.i64", + ret_type, sgpr_index); + + switch (type) { + case SGPR_I64: + return sgpr_value; + case SGPR_PTR_V4I32: + ret_type = LLVMInt32TypeInContext(gallivm->context); + ret_type = LLVMVectorType(ret_type, 4); + ret_type = LLVMPointerType(ret_type, + 0 /*XXX: Specify address space*/); + return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, + ret_type, ""); + case SGPR_PTR_V8I32: + ret_type = LLVMInt32TypeInContext(gallivm->context); + ret_type = LLVMVectorType(ret_type, 8); + ret_type = LLVMPointerType(ret_type, + 0 /*XXX: Specify address space*/); + return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, + ret_type, ""); + default: + assert(!"Unsupported SGPR type in use_sgpr()"); + return NULL; + } +} + +static void declare_input_vs( + struct si_shader_context * si_shader_ctx, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + LLVMValueRef t_list_ptr; + LLVMValueRef t_offset; + LLVMValueRef attribute_offset; + LLVMValueRef buffer_index_reg; + LLVMValueRef args[4]; + LLVMTypeRef vec4_type; + LLVMValueRef input; + struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; + struct r600_context *rctx = si_shader_ctx->rctx; + struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; + unsigned chan; + + /* XXX: Communicate with the rest of the driver about which SGPR the T# + * list pointer is going to be stored in. Hard code to SGPR[0-1] for + * now */ + t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 0); + + t_offset = lp_build_const_int32(base->gallivm, + 4 * velem->vertex_buffer_index); + attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset); + + /* Load the buffer index is always, which is always stored in VGPR0 + * for Vertex Shaders */ + buffer_index_reg = lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); + + vec4_type = LLVMVectorType(base->elem_type, 4); + args[0] = t_list_ptr; + args[1] = t_offset; + args[2] = attribute_offset; + args[3] = buffer_index_reg; + input = lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.vs.load.input", vec4_type, args, 4); + + /* Break up the vec4 into individual components */ + for (chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); + /* XXX: Use a helper function for this. There is one in + * tgsi_llvm.c. */ + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = + LLVMBuildExtractElement(base->gallivm->builder, + input, llvm_chan, ""); + } +} + +static void declare_input_fs( + struct si_shader_context * si_shader_ctx, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + const char * intr_name; + unsigned chan; + struct lp_build_context * base = + &si_shader_ctx->radeon_bld.soa.bld_base.base; + struct gallivm_state * gallivm = base->gallivm; + + /* This value is: + * [15:0] NewPrimMask (Bit mask for each quad. It is set it the + * quad begins a new primitive. Bit 0 always needs + * to be unset) + * [32:16] ParamOffset + * + */ + LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); + + + /* XXX: Is this the input_index? */ + LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); + + /* XXX: Handle all possible interpolation modes */ + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + if (si_shader_ctx->rctx->rasterizer->flatshade) + intr_name = "llvm.SI.fs.interp.constant"; + else + intr_name = "llvm.SI.fs.interp.linear.center"; + break; + case TGSI_INTERPOLATE_CONSTANT: + intr_name = "llvm.SI.fs.interp.constant"; + break; + case TGSI_INTERPOLATE_LINEAR: + intr_name = "llvm.SI.fs.interp.linear.center"; + break; + default: + fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); + return; + } + + /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef args[3]; + LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); + unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); + LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); + args[0] = llvm_chan; + args[1] = attr_number; + args[2] = params; + si_shader_ctx->radeon_bld.inputs[soa_index] = + lp_build_intrinsic(gallivm->builder, intr_name, + input_type, args, 3); + } +} + +static void declare_input( + struct radeon_llvm_context * radeon_bld, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + struct si_shader_context * si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + declare_input_vs(si_shader_ctx, input_index, decl); + } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + declare_input_fs(si_shader_ctx, input_index, decl); + } else { + fprintf(stderr, "Warning: Unsupported shader type,\n"); + } +} + +static LLVMValueRef fetch_constant( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct lp_build_context * base = &bld_base->base; + + LLVMValueRef const_ptr; + LLVMValueRef offset; + + /* XXX: Assume the pointer to the constant buffer is being stored in + * SGPR[2:3] */ + const_ptr = use_sgpr(base->gallivm, SGPR_I64, 1); + + /* XXX: This assumes that the constant buffer is not packed, so + * CONST[0].x will have an offset of 0 and CONST[1].x will have an + * offset of 4. */ + offset = lp_build_const_int32(base->gallivm, + (reg->Register.Index * 4) + swizzle); + + return lp_build_intrinsic_binary(base->gallivm->builder, + "llvm.SI.load.const", base->elem_type, const_ptr, offset); +} + + +/* Declare some intrinsics with the correct attributes */ +static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) +{ + LLVMValueRef function; + struct gallivm_state * gallivm = bld_base->base.gallivm; + + LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context); + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + + /* declare i32 @llvm.SI.use.sgpr.i32(i32) */ + function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32", + i32, &i32, 1); + LLVMAddFunctionAttr(function, LLVMReadNoneAttribute); + + /* declare i64 @llvm.SI.use.sgpr.i64(i32) */ + function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64", + i64, &i32, 1); + LLVMAddFunctionAttr(function, LLVMReadNoneAttribute); +} + +/* XXX: This is partially implemented for VS only at this point. It is not complete */ +static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) +{ + struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); + struct r600_shader * shader = &si_shader_ctx->shader->shader; + struct lp_build_context * base = &bld_base->base; + struct lp_build_context * uint = + &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + struct tgsi_parse_context *parse = &si_shader_ctx->parse; + LLVMValueRef last_args[9] = { 0 }; + + while (!tgsi_parse_end_of_tokens(parse)) { + /* XXX: component_bits controls which components of the output + * registers actually get exported. (e.g bit 0 means export + * X component, bit 1 means export Y component, etc.) I'm + * hard coding this to 0xf for now. In the future, we might + * want to do something else. */ + unsigned component_bits = 0xf; + unsigned chan; + struct tgsi_full_declaration *d = + &parse->FullToken.FullDeclaration; + LLVMValueRef args[9]; + unsigned target; + unsigned index; + unsigned color_count = 0; + unsigned param_count = 0; + int i; + + tgsi_parse_token(parse); + if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) + continue; + + switch (d->Declaration.File) { + case TGSI_FILE_INPUT: + i = shader->ninput++; + shader->input[i].name = d->Semantic.Name; + shader->input[i].sid = d->Semantic.Index; + shader->input[i].interpolate = d->Declaration.Interpolate; + shader->input[i].centroid = d->Declaration.Centroid; + break; + case TGSI_FILE_OUTPUT: + i = shader->noutput++; + shader->output[i].name = d->Semantic.Name; + shader->output[i].sid = d->Semantic.Index; + shader->output[i].interpolate = d->Declaration.Interpolate; + break; + } + + if (d->Declaration.File != TGSI_FILE_OUTPUT) + continue; + + for (index = d->Range.First; index <= d->Range.Last; index++) { + for (chan = 0; chan < 4; chan++ ) { + LLVMValueRef out_ptr = + si_shader_ctx->radeon_bld.soa.outputs + [index][chan]; + /* +5 because the first output value will be + * the 6th argument to the intrinsic. */ + args[chan + 5]= LLVMBuildLoad( + base->gallivm->builder, out_ptr, ""); + } + + /* XXX: We probably need to keep track of the output + * values, so we know what we are passing to the next + * stage. */ + + /* Select the correct target */ + switch(d->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + target = V_008DFC_SQ_EXP_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + target = V_008DFC_SQ_EXP_PARAM + param_count; + param_count++; + } else { + target = V_008DFC_SQ_EXP_MRT + color_count; + color_count++; + } + break; + case TGSI_SEMANTIC_GENERIC: + target = V_008DFC_SQ_EXP_PARAM + param_count; + param_count++; + break; + default: + target = 0; + fprintf(stderr, + "Warning: SI unhandled output type:%d\n", + d->Semantic.Name); + } + + /* Specify which components to enable */ + args[0] = lp_build_const_int32(base->gallivm, + component_bits); + + /* Specify whether the EXEC mask represents the valid mask */ + args[1] = lp_build_const_int32(base->gallivm, 0); + + /* Specify whether this is the last export */ + args[2] = lp_build_const_int32(base->gallivm, 0); + + /* Specify the target we are exporting */ + args[3] = lp_build_const_int32(base->gallivm, target); + + /* Set COMPR flag to zero to export data as 32-bit */ + args[4] = uint->zero; + + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? + (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : + (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { + if (last_args[0]) { + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + last_args, 9); + } + + memcpy(last_args, args, sizeof(args)); + } else { + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9); + } + + } + } + + /* Specify whether the EXEC mask represents the valid mask */ + last_args[1] = lp_build_const_int32(base->gallivm, + si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); + + /* Specify that this is the last export */ + last_args[2] = lp_build_const_int32(base->gallivm, 1); + + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + last_args, 9); + +/* XXX: Look up what this function does */ +/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ +} + +static void tex_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* WriteMask */ + emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, + emit_data->inst->Dst[0].Register.WriteMask); + + /* Coordinates */ + /* XXX: Not all sample instructions need 4 address arguments. */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, LP_CHAN_ALL); + + /* Resource */ + emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2); + emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm, + 32 * emit_data->inst->Src[2].Register.Index); + + /* Sampler */ + emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1); + emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm, + 16 * emit_data->inst->Src[2].Register.Index); + + /* Dimensions */ + /* XXX: We might want to pass this information to the shader at some. */ +/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, + emit_data->inst->Texture.Texture); +*/ + + emit_data->arg_count = 6; + /* XXX: To optimize, we could use a float or v2f32, if the last bits of + * the writemask are clear */ + emit_data->dst_type = LLVMVectorType( + LLVMFloatTypeInContext(bld_base->base.gallivm->context), + 4); +} + +static const struct lp_build_tgsi_action tex_action = { + .fetch_args = tex_fetch_args, + .emit = lp_build_tgsi_intrinsic, + .intr_name = "llvm.SI.sample" +}; + + +int si_pipe_shader_create( + struct pipe_context *ctx, + struct si_pipe_shader *shader) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct si_shader_context si_shader_ctx; + struct tgsi_shader_info shader_info; + struct lp_build_tgsi_context * bld_base; + LLVMModuleRef mod; + unsigned char * inst_bytes; + unsigned inst_byte_count; + unsigned i; + + radeon_llvm_context_init(&si_shader_ctx.radeon_bld); + bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; + + tgsi_scan_shader(shader->tokens, &shader_info); + bld_base->info = &shader_info; + bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; + bld_base->emit_prologue = si_llvm_emit_prologue; + bld_base->emit_epilogue = si_llvm_emit_epilogue; + + bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; + + si_shader_ctx.radeon_bld.load_input = declare_input; + si_shader_ctx.tokens = shader->tokens; + tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); + si_shader_ctx.shader = shader; + si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; + si_shader_ctx.rctx = rctx; + + shader->shader.nr_cbufs = rctx->nr_cbufs; + + lp_build_tgsi_llvm(bld_base, shader->tokens); + + radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); + + mod = bld_base->base.gallivm->module; + tgsi_dump(shader->tokens, 0); + LLVMDumpModule(mod); + radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", 1 /* dump */); + fprintf(stderr, "SI CODE:\n"); + for (i = 0; i < inst_byte_count; i+=4 ) { + fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], + inst_bytes[i + 2], inst_bytes[i + 1], + inst_bytes[i]); + } + + shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); + shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); + shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); + + tgsi_parse_free(&si_shader_ctx.parse); + + /* copy new shader */ + if (shader->bo == NULL) { + uint32_t *ptr; + + shader->bo = (struct r600_resource*) + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count); + if (shader->bo == NULL) { + return -ENOMEM; + } + ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->cs, PIPE_TRANSFER_WRITE); + if (0 /*R600_BIG_ENDIAN*/) { + for (i = 0; i < (inst_byte_count-12)/4; ++i) { + ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); + } + } else { + memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); + } + rctx->ws->buffer_unmap(shader->bo->buf); + } + + free(inst_bytes); + + return 0; +} + +void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) +{ + pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); + + memset(&shader->shader,0,sizeof(struct r600_shader)); +} |