diff options
author | Tom Stellard <[email protected]> | 2012-05-16 15:15:35 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-05-29 11:55:52 -0400 |
commit | 467f51613eb1f2cdaa8624bbbb3d5fae2abca4f2 (patch) | |
tree | aa40f6a2b57e86ba885cbf667ef06e524b05767b /src/gallium/drivers/radeonsi/radeonsi_shader.c | |
parent | 32b83e0366560a77798545880f980adc04b4361f (diff) |
radeonsi: Handle TGSI CONST registers
We now emit LLVM load instructions for TGSI CONST register reads,
which are lowered in the backend to S_LOAD_DWORD* instructions.
Diffstat (limited to 'src/gallium/drivers/radeonsi/radeonsi_shader.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_shader.c | 125 |
1 files changed, 83 insertions, 42 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index e3623e02f4f..4f995494d1f 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -66,54 +66,85 @@ static struct si_shader_context * si_shader_context( #define CENTROID_OFSET 4 #define USE_SGPR_MAX_SUFFIX_LEN 5 +#define CONST_ADDR_SPACE 2 enum sgpr_type { + SGPR_CONST_PTR_F32, + SGPR_CONST_PTR_V4I32, + SGPR_CONST_PTR_V8I32, SGPR_I32, - SGPR_I64, - SGPR_PTR_V4I32, - SGPR_PTR_V8I32 + SGPR_I64 }; +/** + * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad + * + * @param offset The offset parameter specifies the number of + * elements to offset, not the number of bytes or dwords. An element is the + * the type pointed to by the base_ptr parameter (e.g. int is the element of + * an int* pointer) + * + * When LLVM lowers the load instruction, it will convert the element offset + * into a dword offset automatically. + * + */ +static LLVMValueRef build_indexed_load( + struct gallivm_state * gallivm, + LLVMValueRef base_ptr, + LLVMValueRef offset) +{ + LLVMValueRef computed_ptr = LLVMBuildGEP( + gallivm->builder, base_ptr, &offset, 1, ""); + + return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); +} + +/* + * XXX: Instead of using an intrinsic to use a specific SGPR, we should be + * using load instructions. The loads should load from the USER_SGPR address + * space and use the sgpr index as the pointer. + */ static LLVMValueRef use_sgpr( struct gallivm_state * gallivm, enum sgpr_type type, unsigned sgpr) { LLVMValueRef sgpr_index; - LLVMValueRef sgpr_value; LLVMTypeRef ret_type; sgpr_index = lp_build_const_int32(gallivm, sgpr); - if (type == SGPR_I32) { + switch (type) { + case SGPR_CONST_PTR_F32: + ret_type = LLVMFloatTypeInContext(gallivm->context); + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrcf32.", + ret_type, sgpr_index); + case SGPR_I32: ret_type = LLVMInt32TypeInContext(gallivm->context); return lp_build_intrinsic_unary(gallivm->builder, "llvm.SI.use.sgpr.i32", ret_type, sgpr_index); - } - - ret_type = LLVMInt64TypeInContext(gallivm->context); - sgpr_value = lp_build_intrinsic_unary(gallivm->builder, + case SGPR_I64: + ret_type= LLVMInt64TypeInContext(gallivm->context); + return lp_build_intrinsic_unary(gallivm->builder, "llvm.SI.use.sgpr.i64", ret_type, sgpr_index); - - switch (type) { - case SGPR_I64: - return sgpr_value; - case SGPR_PTR_V4I32: + case SGPR_CONST_PTR_V4I32: ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 4); - ret_type = LLVMPointerType(ret_type, - 0 /*XXX: Specify address space*/); - return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, - ret_type, ""); - case SGPR_PTR_V8I32: + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrci128.", + ret_type, sgpr_index); + case SGPR_CONST_PTR_V8I32: ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 8); - ret_type = LLVMPointerType(ret_type, - 0 /*XXX: Specify address space*/); - return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, - ret_type, ""); + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrci256.", + ret_type, sgpr_index); default: assert(!"Unsupported SGPR type in use_sgpr()"); return NULL; @@ -127,9 +158,10 @@ static void declare_input_vs( { LLVMValueRef t_list_ptr; LLVMValueRef t_offset; + LLVMValueRef t_list; LLVMValueRef attribute_offset; LLVMValueRef buffer_index_reg; - LLVMValueRef args[4]; + LLVMValueRef args[3]; LLVMTypeRef vec4_type; LLVMValueRef input; struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; @@ -138,13 +170,17 @@ static void declare_input_vs( struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; unsigned chan; + /* Load the T list */ /* XXX: Communicate with the rest of the driver about which SGPR the T# * list pointer is going to be stored in. Hard code to SGPR[6:7] for * now */ - t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3); + t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3); + + t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index); - t_offset = lp_build_const_int32(base->gallivm, - 4 * velem->vertex_buffer_index); + t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); + + /* Build the attribute offset */ attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset); /* Load the buffer index is always, which is always stored in VGPR0 @@ -153,12 +189,11 @@ static void declare_input_vs( "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); vec4_type = LLVMVectorType(base->elem_type, 4); - args[0] = t_list_ptr; - args[1] = t_offset; - args[2] = attribute_offset; - args[3] = buffer_index_reg; + args[0] = t_list; + args[1] = attribute_offset; + args[2] = buffer_index_reg; input = lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.vs.load.input", vec4_type, args, 4); + "llvm.SI.vs.load.input", vec4_type, args, 3); /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { @@ -274,7 +309,7 @@ static LLVMValueRef fetch_constant( /* XXX: Assume the pointer to the constant buffer is being stored in * SGPR[0:1] */ - const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0); + const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); /* XXX: This assumes that the constant buffer is not packed, so * CONST[0].x will have an offset of 0 and CONST[1].x will have an @@ -282,8 +317,7 @@ static LLVMValueRef fetch_constant( offset = lp_build_const_int32(base->gallivm, (reg->Register.Index * 4) + swizzle); - return lp_build_intrinsic_binary(base->gallivm->builder, - "llvm.SI.load.const", base->elem_type, const_ptr, offset); + return build_indexed_load(base->gallivm, const_ptr, offset); } @@ -457,6 +491,9 @@ static void tex_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { + LLVMValueRef ptr; + LLVMValueRef offset; + /* WriteMask */ emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, emit_data->inst->Dst[0].Register.WriteMask); @@ -467,14 +504,18 @@ static void tex_fetch_args( 0, LP_CHAN_ALL); /* Resource */ - emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2); - emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm, - 8 * emit_data->inst->Src[1].Register.Index); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2); + offset = lp_build_const_int32(bld_base->base.gallivm, + 8 * emit_data->inst->Src[1].Register.Index); + emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, + ptr, offset); /* Sampler */ - emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1); - emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm, - 4 * emit_data->inst->Src[1].Register.Index); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1); + offset = lp_build_const_int32(bld_base->base.gallivm, + 4 * emit_data->inst->Src[1].Register.Index); + emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, + ptr, offset); /* Dimensions */ /* XXX: We might want to pass this information to the shader at some. */ @@ -482,7 +523,7 @@ static void tex_fetch_args( emit_data->inst->Texture.Texture); */ - emit_data->arg_count = 6; + emit_data->arg_count = 4; /* XXX: To optimize, we could use a float or v2f32, if the last bits of * the writemask are clear */ emit_data->dst_type = LLVMVectorType( |