//===-- AMDILInliner.cpp - TODO: Add brief description -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// #define DEBUG_TYPE "amdilinline" #include "AMDIL.h" #include "AMDILCompilerErrors.h" #include "AMDILMachineFunctionInfo.h" #include "AMDILSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; namespace { class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass { public: TargetMachine &TM; static char ID; AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); ~AMDILInlinePass(); virtual const char* getPassName() const; virtual bool runOnFunction(Function &F); bool doInitialization(Module &M); bool doFinalization(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const; private: typedef DenseMap > InlinedArrayAllocasTy; bool AMDILInlineCallIfPossible(CallSite CS, const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas); CodeGenOpt::Level OptLevel; }; char AMDILInlinePass::ID = 0; } // anonymouse namespace namespace llvm { FunctionPass* createAMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) { return new AMDILInlinePass(tm AMDIL_OPT_LEVEL_VAR); } } // llvm namespace AMDILInlinePass::AMDILInlinePass(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) : FunctionPass(ID), TM(tm) { OptLevel = tm.getOptLevel(); } AMDILInlinePass::~AMDILInlinePass() { } bool AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS, const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. SmallVector StaticAllocas; InlineFunctionInfo IFI; if (!InlineFunction(CS, IFI)) return false; DEBUG(errs() << " function " << Caller->getName() << ": inlined call to "<< Callee->getName() << "\n"); // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. if (Callee->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtectReq); else if (Callee->hasFnAttr(Attribute::StackProtect) && !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. // // There are many heuristics possible for merging these allocas, and the // different options have different tradeoffs. One thing that we *really* // don't want to hurt is SRoA: once inlining happens, often allocas are no // longer address taken and so they can be promoted. // // Our "solution" for that is to only merge allocas whose outermost type is an // array type. These are usually not promoted because someone is using a // variable index into them. These are also often the most important ones to // merge. // // A better solution would be to have real memory lifetime markers in the IR // and not have the inliner do any merging of allocas at all. This would // allow the backend to do proper stack slot coloring of all allocas that // *actually make it to the backend*, which is really what we want. // // Because we don't have this information, we do this simple and useful hack. // SmallPtrSet UsedAllocas; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). const ArrayType *ATy = dyn_cast(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. SmallVector &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca)) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } return true; } bool AMDILInlinePass::runOnFunction(Function &MF) { Function *F = &MF; const AMDILSubtarget &STM = TM.getSubtarget(); if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) { return false; } const TargetData *TD = getAnalysisIfAvailable(); SmallVector CallSites; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS = CallSite(cast(I)); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. if (CS.getInstruction() == 0 || isa(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; // We don't want to inline if we are recursive. if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) { AMDILMachineFunctionInfo *MFI = getAnalysis().getMF() .getInfo(); MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]); continue; } CallSites.push_back(CS); } } InlinedArrayAllocasTy InlinedArrayAllocas; bool Changed = false; for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi]; Function *Callee = CS.getCalledFunction(); // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; // Attempt to inline the function... if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas)) continue; Changed = true; } return Changed; } const char* AMDILInlinePass::getPassName() const { return "AMDIL Inline Function Pass"; } bool AMDILInlinePass::doInitialization(Module &M) { return false; } bool AMDILInlinePass::doFinalization(Module &M) { return false; } void AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); FunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); }