diff options
author | Tom Stellard <[email protected]> | 2012-05-16 15:15:35 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-05-29 11:55:52 -0400 |
commit | 467f51613eb1f2cdaa8624bbbb3d5fae2abca4f2 (patch) | |
tree | aa40f6a2b57e86ba885cbf667ef06e524b05767b | |
parent | 32b83e0366560a77798545880f980adc04b4361f (diff) |
radeonsi: Handle TGSI CONST registers
We now emit LLVM load instructions for TGSI CONST register reads,
which are lowered in the backend to S_LOAD_DWORD* instructions.
-rw-r--r-- | src/gallium/drivers/radeon/AMDILCodeEmitter.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/AMDILISelLowering.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600ISelLowering.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SICodeEmitter.cpp | 34 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIGenRegisterInfo.pl | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIISelLowering.cpp | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstrInfo.td | 62 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIInstructions.td | 46 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIIntrinsics.td | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/SIPropagateImmReads.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_shader.c | 125 |
12 files changed, 254 insertions, 100 deletions
diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h index fa46cbd203d..0c7ae598367 100644 --- a/src/gallium/drivers/radeon/AMDILCodeEmitter.h +++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h @@ -37,6 +37,10 @@ namespace llvm { unsigned OpNo) const { return 0; } + virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) + const { + return 0; + } }; } // End namespace llvm diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp index b08e97b02b8..40b35fd45de 100644 --- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp +++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp @@ -67,6 +67,9 @@ private: SDNode *xformAtomicInst(SDNode *N); + bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); + // Include the pieces autogenerated from the target description. #include "AMDILGenDAGISel.inc" }; @@ -513,3 +516,56 @@ AMDILDAGToDAGISel::xformAtomicInst(SDNode *N) #undef INT64_C #endif #undef DEBUGTMP + +///==== AMDGPU Functions ====/// + +bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + + if (Addr.getOpcode() == ISD::ADD) { + bool Match = false; + + // Find the base ptr and the offset + for (unsigned i = 0; i < Addr.getNumOperands(); i++) { + SDValue Arg = Addr.getOperand(i); + ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg); + // This arg isn't a constant so it must be the base PTR. + if (!OffsetNode) { + Base = Addr.getOperand(i); + continue; + } + // Check if the constant argument fits in 8-bits. The offset is in bytes + // so we need to convert it to dwords. + if (isInt<8>(OffsetNode->getZExtValue() >> 2)) { + Match = true; + Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, + MVT::i32); + } + } + return Match; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() != ISD::ADD) { + return false; + } + + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + + return false; +} diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 27cf9b16efb..a52c83e5790 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -796,7 +796,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setPrefLoopAlignment(16); setSelectIsExpensive(true); setJumpIsExpensive(true); - computeRegisterProperties(); maxStoresPerMemcpy = 4096; maxStoresPerMemmove = 4096; diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 77d90e21d1b..5926a08fa76 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -30,6 +30,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass); addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass); + computeRegisterProperties(); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); diff --git a/src/gallium/drivers/radeon/SICodeEmitter.cpp b/src/gallium/drivers/radeon/SICodeEmitter.cpp index 1db9764365a..d2ea1fb5dec 100644 --- a/src/gallium/drivers/radeon/SICodeEmitter.cpp +++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp @@ -65,6 +65,9 @@ namespace { /// for an instruction in place of a register. virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo) const; + /// SMRDmemriEncode - Encoding for SMRD indexed loads + virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) + const; /// VOPPostEncode - Post-Encoder method for VOP instructions virtual uint64_t VOPPostEncode(const MachineInstr &MI, @@ -238,6 +241,37 @@ uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI, return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32); } +#define SMRD_OFFSET_MASK 0xff +#define SMRD_IMM_SHIFT 8 +#define SMRD_SBASE_MASK 0x3f +#define SMRD_SBASE_SHIFT 9 +/// SMRDmemriEncode - This function is responsibe for encoding the offset +/// and the base ptr for SMRD instructions it should return a bit string in +/// this format: +/// +/// OFFSET = bits{7-0} +/// IMM = bits{8} +/// SBASE = bits{14-9} +/// +uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + uint32_t encoding; + + const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1); + + //XXX: Use this function for SMRD loads with register offsets + assert(OffsetOp.isImm()); + + encoding = + (getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK) + | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit + | ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) + ; + + return encoding; +} + /// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR. /// XXX: It would be nice if we could handle this without a PostEncode function. uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI, diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl index bb5ebbd67e6..6c6cd45edeb 100644 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl @@ -182,8 +182,8 @@ my @subregs_64 = ('low', 'high'); my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w'); my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7'); -my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64', 'iPTRAny')); -my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32')); +my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64')); +my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32')); my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32')); my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64')); diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 2455b536f9f..a79aba9358b 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -25,9 +25,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : { addRegisterClass(MVT::v4f32, &AMDIL::VReg_128RegClass); addRegisterClass(MVT::f32, &AMDIL::VReg_32RegClass); + addRegisterClass(MVT::i32, &AMDIL::VReg_32RegClass); + addRegisterClass(MVT::i64, &AMDIL::VReg_64RegClass); + + addRegisterClass(MVT::v4i32, &AMDIL::SReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDIL::SReg_256RegClass); + + computeRegisterProperties(); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::ADD, MVT::i32, Legal); + } MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index 435948ff1de..30c9c3377ad 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -32,6 +32,21 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : field bits<64> Inst; } +class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { + let EncoderMethod = "encodeOperand"; + let MIOperandInfo = opInfo; +} + +def IMM8bit : ImmLeaf < + i32, + [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] +>; + +def IMM12bit : ImmLeaf < + i16, + [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] +>; + class GPR4Align <RegisterClass rc> : Operand <vAny> { let EncoderMethod = "GPR4AlignEncode"; let MIOperandInfo = (ops rc:$reg); @@ -46,6 +61,19 @@ def i32Literal : Operand <i32> { let EncoderMethod = "i32LiteralEncode"; } +def SMRDmemrr : Operand<iPTR> { + let MIOperandInfo = (ops SReg_64, SReg_32); + let EncoderMethod = "GPR2AlignEncode"; +} + +def SMRDmemri : Operand<iPTR> { + let MIOperandInfo = (ops SReg_64, i32imm); + let EncoderMethod = "SMRDmemriEncode"; +} + +def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>; +def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>; + def EXP : Enc64< (outs), (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, @@ -196,9 +224,10 @@ class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : Enc32<outs, ins, asm, pattern> { bits<7> SDST; - bits<8> OFFSET; - bits<6> SBASE; - bits<1> IMM = 0; // Determined by subclasses + bits<15> PTR; + bits<8> OFFSET = PTR{7-0}; + bits<1> IMM = PTR{8}; + bits<6> SBASE = PTR{14-9}; let Inst{7-0} = OFFSET; let Inst{8} = IMM; @@ -420,14 +449,15 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU } /*XXX: We should be able to infer the imm bit based on the arg types */ -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass, + ValueType vt> { def _SGPR : SMRD < op, (outs dstClass:$dst), - (ins SReg_32:$offset, GPR2Align<SReg_64,i64>:$sbase), + (ins SMRDmemrr:$src0), asm, - [] + [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] > { let IMM = 0; } @@ -435,29 +465,13 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { def _IMM : SMRD < op, (outs dstClass:$dst), - (ins i32imm:$offset, GPR2Align<SReg_64,i64>:$sbase), + (ins SMRDmemri:$src0), asm, - [] + [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] > { let IMM = 1; } } -class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { - let EncoderMethod = "encodeOperand"; - let MIOperandInfo = opInfo; -} - -def IMM8bit : ImmLeaf < - i32, - [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] ->; - -def IMM12bit : ImmLeaf < - i16, - [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] ->; - include "SIInstrFormats.td" - include "SIInstructions.td" diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index b6097ef1eeb..a77b8bd7c11 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -346,12 +346,10 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM //def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; //def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; -let mayLoad = 0, neverHasSideEffects = 1 in { - -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32, f32>; //def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; //def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; //def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; //def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; @@ -359,8 +357,6 @@ defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; //def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; //def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; -} // End mayLoad, neverHasSideEffects - //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; @@ -866,29 +862,25 @@ def VS_LOAD_BUFFER_INDEX : InstSI < /* int_SI_vs_load_input */ def : Pat< - (int_SI_vs_load_input SReg_64:$tlst_sgpr, IMM8bit:$t_offset, IMM12bit:$attr_offset, + (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, VReg_32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, - VReg_32:$buf_idx_vgpr, - (S_LOAD_DWORDX4_IMM imm:$t_offset, SReg_64:$tlst_sgpr), - 0, 0, (i32 SREG_LIT_0)) + VReg_32:$buf_idx_vgpr, SReg_128:$tlst, + 0, 0, (i32 SREG_LIT_0)) >; -/* int_SI_load_const */ - -def : Pat < - (int_SI_load_const SReg_64:$const_ptr, IMM8bit:$offset), - (S_LOAD_DWORD_IMM imm:$offset, SReg_64:$const_ptr) +def : Pat< + (int_SI_use_sgprptrcf32 imm:$src0), + (USE_SGPR_64 imm:$src0) >; - - -/* XXX: Complete this pattern with some form of a scalar move immediate */ -/* -def : Pat < - (int_SI_load_const SReg_64:$const_ptr, imm:$offset), - (S_LOAD_DWORD_SGPR imm:$offset, SReg_64:$const_ptr) +def : Pat< + (int_SI_use_sgprptrci128 imm:$src0), + (USE_SGPR_64 imm:$src0) +>; +def : Pat< + (int_SI_use_sgprptrci256 imm:$src0), + (USE_SGPR_64 imm:$src0) >; -*/ /* int_SI_export */ def : Pat < @@ -900,11 +892,9 @@ def : Pat < /* int_SI_sample */ def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_64:$rsrc, imm:$rsrc_offset, - SReg_64:$sampler, imm:$sampler_offset), + (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - (S_LOAD_DWORDX8_IMM imm:$rsrc_offset, SReg_64:$rsrc), /* Resource */ - (S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */ + SReg_256:$rsrc, SReg_128:$sampler) >; def CLAMP_SI : CLAMP<VReg_32>; diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index 4e718355428..d8bf4fab3db 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -18,11 +18,14 @@ let TargetPrefix = "SI", isTarget = 1 in { /* XXX: We may need a seperate intrinsic here for loading integer values */ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], []>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; - def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty]>; + def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>; def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>; - + class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>; + def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr; + def int_SI_use_sgprptrci128 : int_SI_use_sgprptr; + def int_SI_use_sgprptrci256 : int_SI_use_sgprptr; /* Interpolation Intrinsics */ diff --git a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp index 09a28c605e2..39c9532d4de 100644 --- a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp +++ b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp @@ -57,6 +57,7 @@ bool SIPropagateImmReadsPass::runOnMachineFunction(MachineFunction &MF) switch (MI.getOpcode()) { case AMDIL::LOADCONST_f32: case AMDIL::LOADCONST_i32: + case AMDIL::LOADCONST_i64: break; default: continue; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index e3623e02f4f..4f995494d1f 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -66,54 +66,85 @@ static struct si_shader_context * si_shader_context( #define CENTROID_OFSET 4 #define USE_SGPR_MAX_SUFFIX_LEN 5 +#define CONST_ADDR_SPACE 2 enum sgpr_type { + SGPR_CONST_PTR_F32, + SGPR_CONST_PTR_V4I32, + SGPR_CONST_PTR_V8I32, SGPR_I32, - SGPR_I64, - SGPR_PTR_V4I32, - SGPR_PTR_V8I32 + SGPR_I64 }; +/** + * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad + * + * @param offset The offset parameter specifies the number of + * elements to offset, not the number of bytes or dwords. An element is the + * the type pointed to by the base_ptr parameter (e.g. int is the element of + * an int* pointer) + * + * When LLVM lowers the load instruction, it will convert the element offset + * into a dword offset automatically. + * + */ +static LLVMValueRef build_indexed_load( + struct gallivm_state * gallivm, + LLVMValueRef base_ptr, + LLVMValueRef offset) +{ + LLVMValueRef computed_ptr = LLVMBuildGEP( + gallivm->builder, base_ptr, &offset, 1, ""); + + return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); +} + +/* + * XXX: Instead of using an intrinsic to use a specific SGPR, we should be + * using load instructions. The loads should load from the USER_SGPR address + * space and use the sgpr index as the pointer. + */ static LLVMValueRef use_sgpr( struct gallivm_state * gallivm, enum sgpr_type type, unsigned sgpr) { LLVMValueRef sgpr_index; - LLVMValueRef sgpr_value; LLVMTypeRef ret_type; sgpr_index = lp_build_const_int32(gallivm, sgpr); - if (type == SGPR_I32) { + switch (type) { + case SGPR_CONST_PTR_F32: + ret_type = LLVMFloatTypeInContext(gallivm->context); + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrcf32.", + ret_type, sgpr_index); + case SGPR_I32: ret_type = LLVMInt32TypeInContext(gallivm->context); return lp_build_intrinsic_unary(gallivm->builder, "llvm.SI.use.sgpr.i32", ret_type, sgpr_index); - } - - ret_type = LLVMInt64TypeInContext(gallivm->context); - sgpr_value = lp_build_intrinsic_unary(gallivm->builder, + case SGPR_I64: + ret_type= LLVMInt64TypeInContext(gallivm->context); + return lp_build_intrinsic_unary(gallivm->builder, "llvm.SI.use.sgpr.i64", ret_type, sgpr_index); - - switch (type) { - case SGPR_I64: - return sgpr_value; - case SGPR_PTR_V4I32: + case SGPR_CONST_PTR_V4I32: ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 4); - ret_type = LLVMPointerType(ret_type, - 0 /*XXX: Specify address space*/); - return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, - ret_type, ""); - case SGPR_PTR_V8I32: + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrci128.", + ret_type, sgpr_index); + case SGPR_CONST_PTR_V8I32: ret_type = LLVMInt32TypeInContext(gallivm->context); ret_type = LLVMVectorType(ret_type, 8); - ret_type = LLVMPointerType(ret_type, - 0 /*XXX: Specify address space*/); - return LLVMBuildIntToPtr(gallivm->builder, sgpr_value, - ret_type, ""); + ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); + return lp_build_intrinsic_unary(gallivm->builder, + "llvm.SI.use.sgprptrci256.", + ret_type, sgpr_index); default: assert(!"Unsupported SGPR type in use_sgpr()"); return NULL; @@ -127,9 +158,10 @@ static void declare_input_vs( { LLVMValueRef t_list_ptr; LLVMValueRef t_offset; + LLVMValueRef t_list; LLVMValueRef attribute_offset; LLVMValueRef buffer_index_reg; - LLVMValueRef args[4]; + LLVMValueRef args[3]; LLVMTypeRef vec4_type; LLVMValueRef input; struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; @@ -138,13 +170,17 @@ static void declare_input_vs( struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; unsigned chan; + /* Load the T list */ /* XXX: Communicate with the rest of the driver about which SGPR the T# * list pointer is going to be stored in. Hard code to SGPR[6:7] for * now */ - t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3); + t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3); + + t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index); - t_offset = lp_build_const_int32(base->gallivm, - 4 * velem->vertex_buffer_index); + t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); + + /* Build the attribute offset */ attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset); /* Load the buffer index is always, which is always stored in VGPR0 @@ -153,12 +189,11 @@ static void declare_input_vs( "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); vec4_type = LLVMVectorType(base->elem_type, 4); - args[0] = t_list_ptr; - args[1] = t_offset; - args[2] = attribute_offset; - args[3] = buffer_index_reg; + args[0] = t_list; + args[1] = attribute_offset; + args[2] = buffer_index_reg; input = lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.vs.load.input", vec4_type, args, 4); + "llvm.SI.vs.load.input", vec4_type, args, 3); /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { @@ -274,7 +309,7 @@ static LLVMValueRef fetch_constant( /* XXX: Assume the pointer to the constant buffer is being stored in * SGPR[0:1] */ - const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0); + const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); /* XXX: This assumes that the constant buffer is not packed, so * CONST[0].x will have an offset of 0 and CONST[1].x will have an @@ -282,8 +317,7 @@ static LLVMValueRef fetch_constant( offset = lp_build_const_int32(base->gallivm, (reg->Register.Index * 4) + swizzle); - return lp_build_intrinsic_binary(base->gallivm->builder, - "llvm.SI.load.const", base->elem_type, const_ptr, offset); + return build_indexed_load(base->gallivm, const_ptr, offset); } @@ -457,6 +491,9 @@ static void tex_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { + LLVMValueRef ptr; + LLVMValueRef offset; + /* WriteMask */ emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, emit_data->inst->Dst[0].Register.WriteMask); @@ -467,14 +504,18 @@ static void tex_fetch_args( 0, LP_CHAN_ALL); /* Resource */ - emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2); - emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm, - 8 * emit_data->inst->Src[1].Register.Index); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2); + offset = lp_build_const_int32(bld_base->base.gallivm, + 8 * emit_data->inst->Src[1].Register.Index); + emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, + ptr, offset); /* Sampler */ - emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1); - emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm, - 4 * emit_data->inst->Src[1].Register.Index); + ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1); + offset = lp_build_const_int32(bld_base->base.gallivm, + 4 * emit_data->inst->Src[1].Register.Index); + emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, + ptr, offset); /* Dimensions */ /* XXX: We might want to pass this information to the shader at some. */ @@ -482,7 +523,7 @@ static void tex_fetch_args( emit_data->inst->Texture.Texture); */ - emit_data->arg_count = 6; + emit_data->arg_count = 4; /* XXX: To optimize, we could use a float or v2f32, if the last bits of * the writemask are clear */ emit_data->dst_type = LLVMVectorType( |