diff options
-rw-r--r-- | src/gallium/drivers/radeon/AMDIL.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.cpp | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstructions.td | 40 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIIntrinsics.td | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_shader.c | 80 |
5 files changed, 57 insertions, 74 deletions
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h index 45638f64280..8bd024a4bd8 100644 --- a/src/gallium/drivers/radeon/AMDIL.h +++ b/src/gallium/drivers/radeon/AMDIL.h @@ -129,7 +129,8 @@ enum AddressSpaces { ADDRESS_NONE = 5, // Address space for unknown memory. PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) - LAST_ADDRESS = 8 + USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI + LAST_ADDRESS = 9 }; // This union/struct combination is an easy way to read out the diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index a79aba9358b..1cdcd36c648 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -188,6 +188,9 @@ void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI, unsigned dstReg = MI->getOperand(0).getReg(); int64_t newIndex = MI->getOperand(1).getImm(); const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg); + unsigned DwordWidth = dstClass->getSize() / 4; + assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned"); + newIndex = newIndex / DwordWidth; unsigned newReg = dstClass->getRegister(newIndex); addLiveIn(MI, MF, MRI, TII, newReg); diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index a77b8bd7c11..fc8ec4a3394 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -7,6 +7,18 @@ // //===----------------------------------------------------------------------===// +def load_user_sgpr : PatFrag<(ops node:$ptr), + (load node:$ptr), + [{ + const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); + if (Src) { + PointerType * PT = dyn_cast<PointerType>(Src->getType()); + return PT && PT->getAddressSpace() == AMDILAS::USER_SGPR_ADDRESS; + } + return false; + }] +>; + def isSI : Predicate<"Subtarget.device()" "->getGeneration() == AMDILDeviceInfo::HD7XXX">; @@ -826,26 +838,19 @@ def SI_INTERP_CONST : InstSI < imm:$attr, SReg_32:$params))] >; - def USE_SGPR_32 : InstSI < (outs SReg_32:$dst), (ins i32imm:$src0), "USE_SGPR_32", - [(set SReg_32:$dst, (int_SI_use_sgpr imm:$src0))] - -> { - field bits<32> Inst = 0; -} + [(set (i32 SReg_32:$dst), (load_user_sgpr imm:$src0))] +>; def USE_SGPR_64 : InstSI < (outs SReg_64:$dst), (ins i32imm:$src0), "USE_SGPR_64", - [(set SReg_64:$dst, (int_SI_use_sgpr imm:$src0))] - -> { - field bits<32> Inst = 0; -} + [(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))] +>; def VS_LOAD_BUFFER_INDEX : InstSI < (outs VReg_32:$dst), @@ -869,19 +874,6 @@ def : Pat< 0, 0, (i32 SREG_LIT_0)) >; -def : Pat< - (int_SI_use_sgprptrcf32 imm:$src0), - (USE_SGPR_64 imm:$src0) ->; -def : Pat< - (int_SI_use_sgprptrci128 imm:$src0), - (USE_SGPR_64 imm:$src0) ->; -def : Pat< - (int_SI_use_sgprptrci256 imm:$src0), - (USE_SGPR_64 imm:$src0) ->; - /* int_SI_export */ def : Pat < (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index d8bf4fab3db..95273a2a68f 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -21,11 +21,6 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>; - def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>; - class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>; - def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr; - def int_SI_use_sgprptrci128 : int_SI_use_sgprptr; - def int_SI_use_sgprptrci256 : int_SI_use_sgprptr; /* Interpolation Intrinsics */ diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 4f995494d1f..e625a74338a 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -67,6 +67,7 @@ static struct si_shader_context * si_shader_context( #define USE_SGPR_MAX_SUFFIX_LEN 5 #define CONST_ADDR_SPACE 2 +#define USER_SGPR_ADDR_SPACE 8 enum sgpr_type { SGPR_CONST_PTR_F32, @@ -99,10 +100,19 @@ static LLVMValueRef build_indexed_load( return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); } -/* - * XXX: Instead of using an intrinsic to use a specific SGPR, we should be - * using load instructions. The loads should load from the USER_SGPR address - * space and use the sgpr index as the pointer. +/** + * Load a value stored in one of the user SGPRs + * + * @param sgpr This is the sgpr to load the value from. If you need to load a + * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), + * then you should pass the index of the first SGPR that holds the value. For + * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then + * use pass 2 for the sgpr parameter. + * + * The value of the sgpr parameter must also be aligned to the width of the type + * being loaded, so that the sgpr parameter is divisible by the dword width of the + * type. For example, if the value being loaded is two dwords wide, then the sgpr + * parameter must be divisible by two. */ static LLVMValueRef use_sgpr( struct gallivm_state * gallivm, @@ -111,44 +121,48 @@ static LLVMValueRef use_sgpr( { LLVMValueRef sgpr_index; LLVMTypeRef ret_type; + LLVMValueRef ptr; sgpr_index = lp_build_const_int32(gallivm, sgpr); switch (type) { case SGPR_CONST_PTR_F32: + assert(sgpr % 2 == 0); ret_type = LLVMFloatTypeInContext(gallivm->context); ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); - return lp_build_intrinsic_unary(gallivm->builder, - "llvm.SI.use.sgprptrcf32.", - ret_type, sgpr_index); + break; + case SGPR_I32: ret_type = LLVMInt32TypeInContext(gallivm->context); - return lp_build_intrinsic_unary(gallivm->builder, - "llvm.SI.use.sgpr.i32", - ret_type, sgpr_index); + break; + case SGPR_I64: + assert(sgpr % 2 == 0); ret_type= LLVMInt64TypeInContext(gallivm->context); - return lp_build_intrinsic_unary(gallivm->builder, - "llvm.SI.use.sgpr.i64", - ret_type, sgpr_index); + break; + case SGPR_CONST_PTR_V4I32: + assert(sgpr % 2 == 0); ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 4); ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); - return lp_build_intrinsic_unary(gallivm->builder, - "llvm.SI.use.sgprptrci128.", - ret_type, sgpr_index); + break; + case SGPR_CONST_PTR_V8I32: + assert(sgpr % 2 == 0); ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 8); ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); - return lp_build_intrinsic_unary(gallivm->builder, - "llvm.SI.use.sgprptrci256.", - ret_type, sgpr_index); + break; + default: assert(!"Unsupported SGPR type in use_sgpr()"); return NULL; } + + ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); + ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); + return LLVMBuildLoad(gallivm->builder, ptr, ""); } static void declare_input_vs( @@ -174,7 +188,7 @@ static void declare_input_vs( /* XXX: Communicate with the rest of the driver about which SGPR the T# * list pointer is going to be stored in. Hard code to SGPR[6:7] for * now */ - t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3); + t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index); @@ -320,27 +334,6 @@ static LLVMValueRef fetch_constant( return build_indexed_load(base->gallivm, const_ptr, offset); } - -/* Declare some intrinsics with the correct attributes */ -static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) -{ - LLVMValueRef function; - struct gallivm_state * gallivm = bld_base->base.gallivm; - - LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context); - LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); - - /* declare i32 @llvm.SI.use.sgpr.i32(i32) */ - function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32", - i32, &i32, 1); - LLVMAddFunctionAttr(function, LLVMReadNoneAttribute); - - /* declare i64 @llvm.SI.use.sgpr.i64(i32) */ - function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64", - i64, &i32, 1); - LLVMAddFunctionAttr(function, LLVMReadNoneAttribute); -} - /* XXX: This is partially implemented for VS only at this point. It is not complete */ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) { @@ -504,14 +497,14 @@ static void tex_fetch_args( 0, LP_CHAN_ALL); /* Resource */ - ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); offset = lp_build_const_int32(bld_base->base.gallivm, 8 * emit_data->inst->Src[1].Register.Index); emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, ptr, offset); /* Sampler */ - ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); offset = lp_build_const_int32(bld_base->base.gallivm, 4 * emit_data->inst->Src[1].Register.Index); emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, @@ -557,7 +550,6 @@ int si_pipe_shader_create( tgsi_scan_shader(shader->tokens, &shader_info); bld_base->info = &shader_info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; - bld_base->emit_prologue = si_llvm_emit_prologue; bld_base->emit_epilogue = si_llvm_emit_epilogue; bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; |