summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/radeonsi_shader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeonsi/radeonsi_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c565
1 files changed, 565 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
new file mode 100644
index 00000000000..50f2e39314f
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -0,0 +1,565 @@
+
+#include "gallivm/lp_bld_tgsi_action.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "radeon_llvm.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "radeonsi_pipe.h"
+#include "radeonsi_shader.h"
+#include "sid.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+/*
+static ps_remap_inputs(
+ struct tgsi_llvm_context * tl_ctx,
+ unsigned tgsi_index,
+ unsigned tgsi_chan)
+{
+ :
+}
+
+struct si_input
+{
+ struct list_head head;
+ unsigned tgsi_index;
+ unsigned tgsi_chan;
+ unsigned order;
+};
+*/
+
+
+struct si_shader_context
+{
+ struct radeon_llvm_context radeon_bld;
+ struct r600_context *rctx;
+ struct tgsi_parse_context parse;
+ struct tgsi_token * tokens;
+ struct si_pipe_shader *shader;
+ unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
+/* unsigned num_inputs; */
+/* struct list_head inputs; */
+/* unsigned * input_mappings *//* From TGSI to SI hw */
+/* struct tgsi_shader_info info;*/
+};
+
+static struct si_shader_context * si_shader_context(
+ struct lp_build_tgsi_context * bld_base)
+{
+ return (struct si_shader_context *)bld_base;
+}
+
+
+#define PERSPECTIVE_BASE 0
+#define LINEAR_BASE 9
+
+#define SAMPLE_OFFSET 0
+#define CENTER_OFFSET 2
+#define CENTROID_OFSET 4
+
+#define USE_SGPR_MAX_SUFFIX_LEN 5
+
+enum sgpr_type {
+ SGPR_I32,
+ SGPR_I64,
+ SGPR_PTR_V4I32,
+ SGPR_PTR_V8I32
+};
+
+static LLVMValueRef use_sgpr(
+ struct gallivm_state * gallivm,
+ enum sgpr_type type,
+ unsigned sgpr)
+{
+ LLVMValueRef sgpr_index;
+ LLVMValueRef sgpr_value;
+ LLVMTypeRef ret_type;
+
+ sgpr_index = lp_build_const_int32(gallivm, sgpr);
+
+ if (type == SGPR_I32) {
+ ret_type = LLVMInt32TypeInContext(gallivm->context);
+ return lp_build_intrinsic_unary(gallivm->builder,
+ "llvm.SI.use.sgpr.i32",
+ ret_type, sgpr_index);
+ }
+
+ ret_type = LLVMInt64TypeInContext(gallivm->context);
+ sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
+ "llvm.SI.use.sgpr.i64",
+ ret_type, sgpr_index);
+
+ switch (type) {
+ case SGPR_I64:
+ return sgpr_value;
+ case SGPR_PTR_V4I32:
+ ret_type = LLVMInt32TypeInContext(gallivm->context);
+ ret_type = LLVMVectorType(ret_type, 4);
+ ret_type = LLVMPointerType(ret_type,
+ 0 /*XXX: Specify address space*/);
+ return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
+ ret_type, "");
+ case SGPR_PTR_V8I32:
+ ret_type = LLVMInt32TypeInContext(gallivm->context);
+ ret_type = LLVMVectorType(ret_type, 8);
+ ret_type = LLVMPointerType(ret_type,
+ 0 /*XXX: Specify address space*/);
+ return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
+ ret_type, "");
+ default:
+ assert(!"Unsupported SGPR type in use_sgpr()");
+ return NULL;
+ }
+}
+
+static void declare_input_vs(
+ struct si_shader_context * si_shader_ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ LLVMValueRef t_list_ptr;
+ LLVMValueRef t_offset;
+ LLVMValueRef attribute_offset;
+ LLVMValueRef buffer_index_reg;
+ LLVMValueRef args[4];
+ LLVMTypeRef vec4_type;
+ LLVMValueRef input;
+ struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ struct r600_context *rctx = si_shader_ctx->rctx;
+ struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
+ unsigned chan;
+
+ /* XXX: Communicate with the rest of the driver about which SGPR the T#
+ * list pointer is going to be stored in. Hard code to SGPR[0-1] for
+ * now */
+ t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
+
+ t_offset = lp_build_const_int32(base->gallivm,
+ 4 * velem->vertex_buffer_index);
+ attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
+
+ /* Load the buffer index is always, which is always stored in VGPR0
+ * for Vertex Shaders */
+ buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
+
+ vec4_type = LLVMVectorType(base->elem_type, 4);
+ args[0] = t_list_ptr;
+ args[1] = t_offset;
+ args[2] = attribute_offset;
+ args[3] = buffer_index_reg;
+ input = lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.vs.load.input", vec4_type, args, 4);
+
+ /* Break up the vec4 into individual components */
+ for (chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
+ /* XXX: Use a helper function for this. There is one in
+ * tgsi_llvm.c. */
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
+ LLVMBuildExtractElement(base->gallivm->builder,
+ input, llvm_chan, "");
+ }
+}
+
+static void declare_input_fs(
+ struct si_shader_context * si_shader_ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ const char * intr_name;
+ unsigned chan;
+ struct lp_build_context * base =
+ &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ struct gallivm_state * gallivm = base->gallivm;
+
+ /* This value is:
+ * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
+ * quad begins a new primitive. Bit 0 always needs
+ * to be unset)
+ * [32:16] ParamOffset
+ *
+ */
+ LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6);
+
+
+ /* XXX: Is this the input_index? */
+ LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
+
+ /* XXX: Handle all possible interpolation modes */
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ if (si_shader_ctx->rctx->rasterizer->flatshade)
+ intr_name = "llvm.SI.fs.interp.constant";
+ else
+ intr_name = "llvm.SI.fs.interp.linear.center";
+ break;
+ case TGSI_INTERPOLATE_CONSTANT:
+ intr_name = "llvm.SI.fs.interp.constant";
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ intr_name = "llvm.SI.fs.interp.linear.center";
+ break;
+ default:
+ fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ return;
+ }
+
+ /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ LLVMValueRef args[3];
+ LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
+ unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+ LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
+ args[0] = llvm_chan;
+ args[1] = attr_number;
+ args[2] = params;
+ si_shader_ctx->radeon_bld.inputs[soa_index] =
+ lp_build_intrinsic(gallivm->builder, intr_name,
+ input_type, args, 3);
+ }
+}
+
+static void declare_input(
+ struct radeon_llvm_context * radeon_bld,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct si_shader_context * si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ declare_input_vs(si_shader_ctx, input_index, decl);
+ } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ declare_input_fs(si_shader_ctx, input_index, decl);
+ } else {
+ fprintf(stderr, "Warning: Unsupported shader type,\n");
+ }
+}
+
+static LLVMValueRef fetch_constant(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle)
+{
+ struct lp_build_context * base = &bld_base->base;
+
+ LLVMValueRef const_ptr;
+ LLVMValueRef offset;
+
+ /* XXX: Assume the pointer to the constant buffer is being stored in
+ * SGPR[2:3] */
+ const_ptr = use_sgpr(base->gallivm, SGPR_I64, 1);
+
+ /* XXX: This assumes that the constant buffer is not packed, so
+ * CONST[0].x will have an offset of 0 and CONST[1].x will have an
+ * offset of 4. */
+ offset = lp_build_const_int32(base->gallivm,
+ (reg->Register.Index * 4) + swizzle);
+
+ return lp_build_intrinsic_binary(base->gallivm->builder,
+ "llvm.SI.load.const", base->elem_type, const_ptr, offset);
+}
+
+
+/* Declare some intrinsics with the correct attributes */
+static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
+{
+ LLVMValueRef function;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+
+ LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ /* declare i32 @llvm.SI.use.sgpr.i32(i32) */
+ function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
+ i32, &i32, 1);
+ LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
+
+ /* declare i64 @llvm.SI.use.sgpr.i64(i32) */
+ function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
+ i64, &i32, 1);
+ LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
+}
+
+/* XXX: This is partially implemented for VS only at this point. It is not complete */
+static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
+ struct r600_shader * shader = &si_shader_ctx->shader->shader;
+ struct lp_build_context * base = &bld_base->base;
+ struct lp_build_context * uint =
+ &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ struct tgsi_parse_context *parse = &si_shader_ctx->parse;
+ LLVMValueRef last_args[9] = { 0 };
+
+ while (!tgsi_parse_end_of_tokens(parse)) {
+ /* XXX: component_bits controls which components of the output
+ * registers actually get exported. (e.g bit 0 means export
+ * X component, bit 1 means export Y component, etc.) I'm
+ * hard coding this to 0xf for now. In the future, we might
+ * want to do something else. */
+ unsigned component_bits = 0xf;
+ unsigned chan;
+ struct tgsi_full_declaration *d =
+ &parse->FullToken.FullDeclaration;
+ LLVMValueRef args[9];
+ unsigned target;
+ unsigned index;
+ unsigned color_count = 0;
+ unsigned param_count = 0;
+ int i;
+
+ tgsi_parse_token(parse);
+ if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
+ continue;
+
+ switch (d->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ i = shader->ninput++;
+ shader->input[i].name = d->Semantic.Name;
+ shader->input[i].sid = d->Semantic.Index;
+ shader->input[i].interpolate = d->Declaration.Interpolate;
+ shader->input[i].centroid = d->Declaration.Centroid;
+ break;
+ case TGSI_FILE_OUTPUT:
+ i = shader->noutput++;
+ shader->output[i].name = d->Semantic.Name;
+ shader->output[i].sid = d->Semantic.Index;
+ shader->output[i].interpolate = d->Declaration.Interpolate;
+ break;
+ }
+
+ if (d->Declaration.File != TGSI_FILE_OUTPUT)
+ continue;
+
+ for (index = d->Range.First; index <= d->Range.Last; index++) {
+ for (chan = 0; chan < 4; chan++ ) {
+ LLVMValueRef out_ptr =
+ si_shader_ctx->radeon_bld.soa.outputs
+ [index][chan];
+ /* +5 because the first output value will be
+ * the 6th argument to the intrinsic. */
+ args[chan + 5]= LLVMBuildLoad(
+ base->gallivm->builder, out_ptr, "");
+ }
+
+ /* XXX: We probably need to keep track of the output
+ * values, so we know what we are passing to the next
+ * stage. */
+
+ /* Select the correct target */
+ switch(d->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ target = V_008DFC_SQ_EXP_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ param_count++;
+ } else {
+ target = V_008DFC_SQ_EXP_MRT + color_count;
+ color_count++;
+ }
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ param_count++;
+ break;
+ default:
+ target = 0;
+ fprintf(stderr,
+ "Warning: SI unhandled output type:%d\n",
+ d->Semantic.Name);
+ }
+
+ /* Specify which components to enable */
+ args[0] = lp_build_const_int32(base->gallivm,
+ component_bits);
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ args[1] = lp_build_const_int32(base->gallivm, 0);
+
+ /* Specify whether this is the last export */
+ args[2] = lp_build_const_int32(base->gallivm, 0);
+
+ /* Specify the target we are exporting */
+ args[3] = lp_build_const_int32(base->gallivm, target);
+
+ /* Set COMPR flag to zero to export data as 32-bit */
+ args[4] = uint->zero;
+
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
+ (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
+ (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
+ if (last_args[0]) {
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ last_args, 9);
+ }
+
+ memcpy(last_args, args, sizeof(args));
+ } else {
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9);
+ }
+
+ }
+ }
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ last_args[1] = lp_build_const_int32(base->gallivm,
+ si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
+
+ /* Specify that this is the last export */
+ last_args[2] = lp_build_const_int32(base->gallivm, 1);
+
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ last_args, 9);
+
+/* XXX: Look up what this function does */
+/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* WriteMask */
+ emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
+ emit_data->inst->Dst[0].Register.WriteMask);
+
+ /* Coordinates */
+ /* XXX: Not all sample instructions need 4 address arguments. */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, LP_CHAN_ALL);
+
+ /* Resource */
+ emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
+ emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
+ 32 * emit_data->inst->Src[2].Register.Index);
+
+ /* Sampler */
+ emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
+ emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
+ 16 * emit_data->inst->Src[2].Register.Index);
+
+ /* Dimensions */
+ /* XXX: We might want to pass this information to the shader at some. */
+/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
+ emit_data->inst->Texture.Texture);
+*/
+
+ emit_data->arg_count = 6;
+ /* XXX: To optimize, we could use a float or v2f32, if the last bits of
+ * the writemask are clear */
+ emit_data->dst_type = LLVMVectorType(
+ LLVMFloatTypeInContext(bld_base->base.gallivm->context),
+ 4);
+}
+
+static const struct lp_build_tgsi_action tex_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = lp_build_tgsi_intrinsic,
+ .intr_name = "llvm.SI.sample"
+};
+
+
+int si_pipe_shader_create(
+ struct pipe_context *ctx,
+ struct si_pipe_shader *shader)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct si_shader_context si_shader_ctx;
+ struct tgsi_shader_info shader_info;
+ struct lp_build_tgsi_context * bld_base;
+ LLVMModuleRef mod;
+ unsigned char * inst_bytes;
+ unsigned inst_byte_count;
+ unsigned i;
+
+ radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
+ bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
+
+ tgsi_scan_shader(shader->tokens, &shader_info);
+ bld_base->info = &shader_info;
+ bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
+ bld_base->emit_prologue = si_llvm_emit_prologue;
+ bld_base->emit_epilogue = si_llvm_emit_epilogue;
+
+ bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
+
+ si_shader_ctx.radeon_bld.load_input = declare_input;
+ si_shader_ctx.tokens = shader->tokens;
+ tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
+ si_shader_ctx.shader = shader;
+ si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
+ si_shader_ctx.rctx = rctx;
+
+ shader->shader.nr_cbufs = rctx->nr_cbufs;
+
+ lp_build_tgsi_llvm(bld_base, shader->tokens);
+
+ radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
+
+ mod = bld_base->base.gallivm->module;
+ tgsi_dump(shader->tokens, 0);
+ LLVMDumpModule(mod);
+ radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", 1 /* dump */);
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < inst_byte_count; i+=4 ) {
+ fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
+ inst_bytes[i + 2], inst_bytes[i + 1],
+ inst_bytes[i]);
+ }
+
+ shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
+ shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
+ shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
+
+ tgsi_parse_free(&si_shader_ctx.parse);
+
+ /* copy new shader */
+ if (shader->bo == NULL) {
+ uint32_t *ptr;
+
+ shader->bo = (struct r600_resource*)
+ pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count);
+ if (shader->bo == NULL) {
+ return -ENOMEM;
+ }
+ ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->cs, PIPE_TRANSFER_WRITE);
+ if (0 /*R600_BIG_ENDIAN*/) {
+ for (i = 0; i < (inst_byte_count-12)/4; ++i) {
+ ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
+ }
+ } else {
+ memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
+ }
+ rctx->ws->buffer_unmap(shader->bo->buf);
+ }
+
+ free(inst_bytes);
+
+ return 0;
+}
+
+void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
+{
+ pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
+
+ memset(&shader->shader,0,sizeof(struct r600_shader));
+}