Example #1
SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  SDValue LHS   = Op.getOperand(0);
  SDValue RHS   = Op.getOperand(1);
  DebugLoc dl   = Op.getDebugLoc();

  // If we are doing an AND and testing against zero, then the CMP
  // will not be generated.  The AND (or BIT) will generate the condition codes,
  // but they are different from CMP.
  // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
  // lowering & isel wouldn't diverge.
  bool andCC = false;
  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
    if (RHSC->isNullValue() && LHS.hasOneUse() &&
        (LHS.getOpcode() == ISD::AND ||
         (LHS.getOpcode() == ISD::TRUNCATE &&
          LHS.getOperand(0).getOpcode() == ISD::AND))) {
      andCC = true;
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  SDValue TargetCC;
  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);

  // Get the condition codes directly from the status register, if its easy.
  // Otherwise a branch will be generated.  Note that the AND and BIT
  // instructions generate different flags than CMP, the carry bit can be used
  // for NE/EQ.
  bool Invert = false;
  bool Shift = false;
  bool Convert = true;
  switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
    Convert = false;
   case MSP430CC::COND_HS:
     // Res = SRW & 1, no processing is required
   case MSP430CC::COND_LO:
     // Res = ~(SRW & 1)
     Invert = true;
   case MSP430CC::COND_NE:
     if (andCC) {
       // C = ~Z, thus Res = SRW & 1, no processing is required
     } else {
       // Res = ~((SRW >> 1) & 1)
       Shift = true;
       Invert = true;
   case MSP430CC::COND_E:
     Shift = true;
     // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
     // Res = (SRW >> 1) & 1 is 1 word shorter.
  EVT VT = Op.getValueType();
  SDValue One  = DAG.getConstant(1, VT);
  if (Convert) {
    SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
                                    MVT::i16, Flag);
    if (Shift)
      // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
      SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
    SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
    if (Invert)
      SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
    return SR;
  } else {
    SDValue Zero = DAG.getConstant(0, VT);
    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
    SmallVector<SDValue, 4> Ops;
    return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
Example #2
llvm::Constant *
CodeGenVTables::CreateVTableInitializer(const CXXRecordDecl *RD,
                                        const VTableComponent *Components, 
                                        unsigned NumComponents,
                                const VTableLayout::VTableThunkTy *VTableThunks,
                                        unsigned NumVTableThunks) {
  SmallVector<llvm::Constant *, 64> Inits;

  llvm::Type *Int8PtrTy = CGM.Int8PtrTy;
  llvm::Type *PtrDiffTy = 

  QualType ClassType = CGM.getContext().getTagDeclType(RD);
  llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(ClassType);
  unsigned NextVTableThunkIndex = 0;
  llvm::Constant* PureVirtualFn = 0;

  for (unsigned I = 0; I != NumComponents; ++I) {
    VTableComponent Component = Components[I];

    llvm::Constant *Init = 0;

    switch (Component.getKind()) {
    case VTableComponent::CK_VCallOffset:
      Init = llvm::ConstantInt::get(PtrDiffTy, 
      Init = llvm::ConstantExpr::getIntToPtr(Init, Int8PtrTy);
    case VTableComponent::CK_VBaseOffset:
      Init = llvm::ConstantInt::get(PtrDiffTy, 
      Init = llvm::ConstantExpr::getIntToPtr(Init, Int8PtrTy);
    case VTableComponent::CK_OffsetToTop:
      Init = llvm::ConstantInt::get(PtrDiffTy, 
      Init = llvm::ConstantExpr::getIntToPtr(Init, Int8PtrTy);
    case VTableComponent::CK_RTTI:
      Init = llvm::ConstantExpr::getBitCast(RTTI, Int8PtrTy);
    case VTableComponent::CK_FunctionPointer:
    case VTableComponent::CK_CompleteDtorPointer:
    case VTableComponent::CK_DeletingDtorPointer: {
      GlobalDecl GD;
      // Get the right global decl.
      switch (Component.getKind()) {
        llvm_unreachable("Unexpected vtable component kind");
      case VTableComponent::CK_FunctionPointer:
        GD = Component.getFunctionDecl();
      case VTableComponent::CK_CompleteDtorPointer:
        GD = GlobalDecl(Component.getDestructorDecl(), Dtor_Complete);
      case VTableComponent::CK_DeletingDtorPointer:
        GD = GlobalDecl(Component.getDestructorDecl(), Dtor_Deleting);

      if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) {
        // We have a pure virtual member function.
        if (!PureVirtualFn) {
          llvm::FunctionType *Ty = 
            llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
          StringRef PureCallName = CGM.getCXXABI().GetPureVirtualCallName();
          PureVirtualFn = CGM.CreateRuntimeFunction(Ty, PureCallName);
          PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn,
        Init = PureVirtualFn;
      } else {
        // Check if we should use a thunk.
        if (NextVTableThunkIndex < NumVTableThunks &&
            VTableThunks[NextVTableThunkIndex].first == I) {
          const ThunkInfo &Thunk = VTableThunks[NextVTableThunkIndex].second;
          MaybeEmitThunkAvailableExternally(GD, Thunk);
          Init = CGM.GetAddrOfThunk(GD, Thunk);

        } else {
          llvm::Type *Ty = CGM.getTypes().GetFunctionTypeForVTable(GD);
          Init = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);

        Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy);

    case VTableComponent::CK_UnusedFunctionPointer:
      Init = llvm::ConstantExpr::getNullValue(Int8PtrTy);
  llvm::ArrayType *ArrayType = llvm::ArrayType::get(Int8PtrTy, NumComponents);
  return llvm::ConstantArray::get(ArrayType, Inits);
Example #3
static void emitImplicitValueConstructor(SILGenFunction &SGF,
                                         ConstructorDecl *ctor) {
  RegularLocation Loc(ctor);
  // FIXME: Handle 'self' along with the other arguments.
  auto *paramList = ctor->getParameters();
  auto *selfDecl = ctor->getImplicitSelfDecl();
  auto selfTyCan = selfDecl->getType();
  auto selfIfaceTyCan = selfDecl->getInterfaceType();
  SILType selfTy = SGF.getLoweredType(selfTyCan);

  // Emit the indirect return argument, if any.
  SILValue resultSlot;
  if (selfTy.isAddressOnly(SGF.SGM.M) && SGF.silConv.useLoweredAddresses()) {
    auto &AC = SGF.getASTContext();
    auto VD = new (AC) ParamDecl(VarDecl::Specifier::InOut,
                                 SourceLoc(), SourceLoc(),
    resultSlot = SGF.F.begin()->createFunctionArgument(selfTy, VD);

  // Emit the elementwise arguments.
  SmallVector<RValue, 4> elements;
  for (size_t i = 0, size = paramList->size(); i < size; ++i) {
    auto &param = paramList->get(i);

          SGF, Loc, param->getInterfaceType()->getCanonicalType(), ctor));

  emitConstructorMetatypeArg(SGF, ctor);

  auto *decl = selfTy.getStructOrBoundGenericStruct();
  assert(decl && "not a struct?!");

  // If we have an indirect return slot, initialize it in-place.
  if (resultSlot) {

    auto elti = elements.begin(), eltEnd = elements.end();
    for (VarDecl *field : decl->getStoredProperties()) {
      auto fieldTy = selfTy.getFieldType(field, SGF.SGM.M);
      auto &fieldTL = SGF.getTypeLowering(fieldTy);
      SILValue slot = SGF.B.createStructElementAddr(Loc, resultSlot, field,
      InitializationPtr init(new KnownAddressInitialization(slot));

      // An initialized 'let' property has a single value specified by the
      // initializer - it doesn't come from an argument.
      if (!field->isStatic() && field->isLet() &&
          field->getParentInitializer()) {
#ifndef NDEBUG
        auto fieldTy = decl->getDeclContext()->mapTypeIntoContext(
               && "Checked by sema");

        // Cleanup after this initialization.
        FullExpr scope(SGF.Cleanups, field->getParentPatternBinding());
        SGF.emitExprInto(field->getParentInitializer(), init.get());

      assert(elti != eltEnd && "number of args does not match number of fields");
      std::move(*elti).forwardInto(SGF, Loc, init.get());

  // Otherwise, build a struct value directly from the elements.
  SmallVector<SILValue, 4> eltValues;

  auto elti = elements.begin(), eltEnd = elements.end();
  for (VarDecl *field : decl->getStoredProperties()) {
    auto fieldTy = selfTy.getFieldType(field, SGF.SGM.M);
    SILValue v;

    // An initialized 'let' property has a single value specified by the
    // initializer - it doesn't come from an argument.
    if (!field->isStatic() && field->isLet() && field->getParentInitializer()) {
      // Cleanup after this initialization.
      FullExpr scope(SGF.Cleanups, field->getParentPatternBinding());
      v = SGF.emitRValue(field->getParentInitializer())
             .forwardAsSingleStorageValue(SGF, fieldTy, Loc);
    } else {
      assert(elti != eltEnd && "number of args does not match number of fields");
      v = std::move(*elti).forwardAsSingleStorageValue(SGF, fieldTy, Loc);


  SILValue selfValue = SGF.B.createStruct(Loc, selfTy, eltValues);
/// Insert monomorphic inline caches for a specific class or metatype
/// type \p SubClassTy.
static FullApplySite speculateMonomorphicTarget(FullApplySite AI,
                                                SILType SubType,
                                                CheckedCastBranchInst *&CCBI) {
  CCBI = nullptr;
  // Bail if this class_method cannot be devirtualized.
  if (!canDevirtualizeClassMethod(AI, SubType))
    return FullApplySite();

  // Create a diamond shaped control flow and a checked_cast_branch
  // instruction that checks the exact type of the object.
  // This cast selects between two paths: one that calls the slow dynamic
  // dispatch and one that calls the specific method.
  auto It = AI.getInstruction()->getIterator();
  SILFunction *F = AI.getFunction();
  SILBasicBlock *Entry = AI.getParent();

  // Iden is the basic block containing the direct call.
  SILBasicBlock *Iden = F->createBasicBlock();
  // Virt is the block containing the slow virtual call.
  SILBasicBlock *Virt = F->createBasicBlock();

  SILBasicBlock *Continue = Entry->splitBasicBlock(It);

  SILBuilderWithScope Builder(Entry, AI.getInstruction());
  // Create the checked_cast_branch instruction that checks at runtime if the
  // class instance is identical to the SILType.

  ClassMethodInst *CMI = cast<ClassMethodInst>(AI.getCallee());

  CCBI = Builder.createCheckedCastBranch(AI.getLoc(), /*exact*/ true,
                                       CMI->getOperand(), SubType, Iden,
  It = CCBI->getIterator();

  SILBuilderWithScope VirtBuilder(Virt, AI.getInstruction());
  SILBuilderWithScope IdenBuilder(Iden, AI.getInstruction());
  // This is the class reference downcasted into subclass SubType.
  SILValue DownCastedClassInstance = Iden->getBBArg(0);

  // Copy the two apply instructions into the two blocks.
  FullApplySite IdenAI = CloneApply(AI, IdenBuilder);
  FullApplySite VirtAI = CloneApply(AI, VirtBuilder);

  // See if Continue has a release on self as the instruction right after the
  // apply. If it exists, move it into position in the diamond.
  if (auto *Release =
          dyn_cast<StrongReleaseInst>(std::next(Continue->begin()))) {
    if (Release->getOperand() == CMI->getOperand()) {
      VirtBuilder.createStrongRelease(Release->getLoc(), CMI->getOperand(),
          Release->getLoc(), DownCastedClassInstance, Atomicity::Atomic);

  // Create a PHInode for returning the return value from both apply
  // instructions.
  SILArgument *Arg = Continue->createBBArg(AI.getType());
  if (!isa<TryApplyInst>(AI)) {
    IdenBuilder.createBranch(AI.getLoc(), Continue,
    VirtBuilder.createBranch(AI.getLoc(), Continue,

  // Remove the old Apply instruction.
  assert(AI.getInstruction() == &Continue->front() &&
         "AI should be the first instruction in the split Continue block");
  if (!isa<TryApplyInst>(AI)) {
    assert(!Continue->empty() &&
           "There should be at least a terminator after AI");
  } else {
    assert(Continue->empty() &&
           "There should not be an instruction after try_apply");

  // Update the stats.

  // Devirtualize the apply instruction on the identical path.
  auto NewInstPair = devirtualizeClassMethod(IdenAI, DownCastedClassInstance);
  assert(NewInstPair.first && "Expected to be able to devirtualize apply!");
  replaceDeadApply(IdenAI, NewInstPair.first);

  // Split critical edges resulting from VirtAI.
  if (auto *TAI = dyn_cast<TryApplyInst>(VirtAI)) {
    auto *ErrorBB = TAI->getFunction()->createBasicBlock();
    Builder.createBranch(TAI->getLoc(), TAI->getErrorBB(),

    auto *NormalBB = TAI->getFunction()->createBasicBlock();
    Builder.createBranch(TAI->getLoc(), TAI->getNormalBB(),
                        {NormalBB->getBBArg(0) });

    SmallVector<SILValue, 4> Args;
    for (auto Arg : VirtAI.getArguments()) {
    FullApplySite NewVirtAI = Builder.createTryApply(VirtAI.getLoc(), VirtAI.getCallee(),
        VirtAI.getSubstCalleeSILType(), VirtAI.getSubstitutions(),
        Args, NormalBB, ErrorBB);
    VirtAI = NewVirtAI;

  return VirtAI;
/// DetermineInsertionPoint - At this point, we're committed to promoting the
/// alloca using IDF's, and the standard SSA construction algorithm.  Determine
/// which blocks need phi nodes and see if we can optimize out some work by
/// avoiding insertion of dead phi nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
                                             AllocaInfo &Info) {
  // Unique the set of defining blocks for efficient lookup.
  SmallPtrSet<BasicBlock*, 32> DefBlocks;
  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());

  // Determine which blocks the value is live in.  These are blocks which lead
  // to uses.
  SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);

  // Use a priority queue keyed on dominator tree level so that inserted nodes
  // are handled from the bottom of the dominator tree upwards.
  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
                              DomTreeNodeCompare> IDFPriorityQueue;
  IDFPriorityQueue PQ;

  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
       E = DefBlocks.end(); I != E; ++I) {
    if (DomTreeNode *Node = DT.getNode(*I))
      PQ.push(std::make_pair(Node, DomLevels[Node]));

  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
  SmallPtrSet<DomTreeNode*, 32> Visited;
  SmallVector<DomTreeNode*, 32> Worklist;
  while (!PQ.empty()) {
    DomTreeNodePair RootPair = PQ.top();
    DomTreeNode *Root = RootPair.first;
    unsigned RootLevel = RootPair.second;

    // Walk all dominator tree children of Root, inspecting their CFG edges with
    // targets elsewhere on the dominator tree. Only targets whose level is at
    // most Root's level are added to the iterated dominance frontier of the
    // definition set.


    while (!Worklist.empty()) {
      DomTreeNode *Node = Worklist.pop_back_val();
      BasicBlock *BB = Node->getBlock();

      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
           ++SI) {
        DomTreeNode *SuccNode = DT.getNode(*SI);

        // Quickly skip all CFG edges that are also dominator tree edges instead
        // of catching them below.
        if (SuccNode->getIDom() == Node)

        unsigned SuccLevel = DomLevels[SuccNode];
        if (SuccLevel > RootLevel)

        if (!Visited.insert(SuccNode))

        BasicBlock *SuccBB = SuccNode->getBlock();
        if (!LiveInBlocks.count(SuccBB))

        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
        if (!DefBlocks.count(SuccBB))
          PQ.push(std::make_pair(SuccNode, SuccLevel));

      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
           ++CI) {
        if (!Visited.count(*CI))

  if (DFBlocks.size() > 1)
    std::sort(DFBlocks.begin(), DFBlocks.end());

  unsigned CurrentVersion = 0;
  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
Example #6
/// VerifyIndirectJumps - Verify whether any possible indirect jump
/// might cross a protection boundary.  Unlike direct jumps, indirect
/// jumps count cleanups as protection boundaries:  since there's no
/// way to know where the jump is going, we can't implicitly run the
/// right cleanups the way we can with direct jumps.
/// Thus, an indirect jump is "trivial" if it bypasses no
/// initializations and no teardowns.  More formally, an indirect jump
/// from A to B is trivial if the path out from A to DCA(A,B) is
/// trivial and the path in from DCA(A,B) to B is trivial, where
/// DCA(A,B) is the deepest common ancestor of A and B.
/// Jump-triviality is transitive but asymmetric.
/// A path in is trivial if none of the entered scopes have an InDiag.
/// A path out is trivial is none of the exited scopes have an OutDiag.
/// Under these definitions, this function checks that the indirect
/// jump between A and B is trivial for every indirect goto statement A
/// and every label B whose address was taken in the function.
void JumpScopeChecker::VerifyIndirectJumps() {
  if (IndirectJumps.empty()) return;

  // If there aren't any address-of-label expressions in this function,
  // complain about the first indirect goto.
  if (IndirectJumpTargets.empty()) {

  // Collect a single representative of every scope containing an
  // indirect goto.  For most code bases, this substantially cuts
  // down on the number of jump sites we'll have to consider later.
  typedef std::pair<unsigned, IndirectGotoStmt*> JumpScope;
  SmallVector<JumpScope, 32> JumpScopes;
    llvm::DenseMap<unsigned, IndirectGotoStmt*> JumpScopesMap;
    for (SmallVectorImpl<IndirectGotoStmt*>::iterator
           I = IndirectJumps.begin(), E = IndirectJumps.end(); I != E; ++I) {
      IndirectGotoStmt *IG = *I;
      assert(LabelAndGotoScopes.count(IG) &&
             "indirect jump didn't get added to scopes?");
      unsigned IGScope = LabelAndGotoScopes[IG];
      IndirectGotoStmt *&Entry = JumpScopesMap[IGScope];
      if (!Entry) Entry = IG;
    for (llvm::DenseMap<unsigned, IndirectGotoStmt*>::iterator
           I = JumpScopesMap.begin(), E = JumpScopesMap.end(); I != E; ++I)

  // Collect a single representative of every scope containing a
  // label whose address was taken somewhere in the function.
  // For most code bases, there will be only one such scope.
  llvm::DenseMap<unsigned, LabelDecl*> TargetScopes;
  for (SmallVectorImpl<LabelDecl*>::iterator
         I = IndirectJumpTargets.begin(), E = IndirectJumpTargets.end();
       I != E; ++I) {
    LabelDecl *TheLabel = *I;
    assert(LabelAndGotoScopes.count(TheLabel->getStmt()) &&
           "Referenced label didn't get added to scopes?");
    unsigned LabelScope = LabelAndGotoScopes[TheLabel->getStmt()];
    LabelDecl *&Target = TargetScopes[LabelScope];
    if (!Target) Target = TheLabel;

  // For each target scope, make sure it's trivially reachable from
  // every scope containing a jump site.
  // A path between scopes always consists of exitting zero or more
  // scopes, then entering zero or more scopes.  We build a set of
  // of scopes S from which the target scope can be trivially
  // entered, then verify that every jump scope can be trivially
  // exitted to reach a scope in S.
  llvm::BitVector Reachable(Scopes.size(), false);
  for (llvm::DenseMap<unsigned,LabelDecl*>::iterator
         TI = TargetScopes.begin(), TE = TargetScopes.end(); TI != TE; ++TI) {
    unsigned TargetScope = TI->first;
    LabelDecl *TargetLabel = TI->second;


    // Mark all the enclosing scopes from which you can safely jump
    // into the target scope.  'Min' will end up being the index of
    // the shallowest such scope.
    unsigned Min = TargetScope;
    while (true) {

      // Don't go beyond the outermost scope.
      if (Min == 0) break;

      // Stop if we can't trivially enter the current scope.
      if (Scopes[Min].InDiag) break;

      Min = Scopes[Min].ParentScope;

    // Walk through all the jump sites, checking that they can trivially
    // reach this label scope.
    for (SmallVectorImpl<JumpScope>::iterator
           I = JumpScopes.begin(), E = JumpScopes.end(); I != E; ++I) {
      unsigned Scope = I->first;

      // Walk out the "scope chain" for this scope, looking for a scope
      // we've marked reachable.  For well-formed code this amortizes
      // to O(JumpScopes.size() / Scopes.size()):  we only iterate
      // when we see something unmarked, and in well-formed code we
      // mark everything we iterate past.
      bool IsReachable = false;
      while (true) {
        if (Reachable.test(Scope)) {
          // If we find something reachable, mark all the scopes we just
          // walked through as reachable.
          for (unsigned S = I->first; S != Scope; S = Scopes[S].ParentScope)
          IsReachable = true;

        // Don't walk out if we've reached the top-level scope or we've
        // gotten shallower than the shallowest reachable scope.
        if (Scope == 0 || Scope < Min) break;

        // Don't walk out through an out-diagnostic.
        if (Scopes[Scope].OutDiag) break;

        Scope = Scopes[Scope].ParentScope;

      // Only diagnose if we didn't find something.
      if (IsReachable) continue;

      DiagnoseIndirectJump(I->second, I->first, TargetLabel, TargetScope);
/// \brief Collect the set of header includes needed to construct the given 
/// module and update the TopHeaders file set of the module.
/// \param Module The module we're collecting includes from.
/// \param Includes Will be augmented with the set of \#includes or \#imports
/// needed to load all of the named headers.
static std::error_code
collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
                            ModuleMap &ModMap, clang::Module *Module,
                            SmallVectorImpl<char> &Includes) {
  // Don't collect any headers for unavailable modules.
  if (!Module->isAvailable())
    return std::error_code();

  // Add includes for each of these headers.
  for (Module::Header &H : Module->Headers[Module::HK_Normal]) {
    // Use the path as specified in the module map file. We'll look for this
    // file relative to the module build directory (the directory containing
    // the module map file) so this will find the same file that we found
    // while parsing the module map.
    if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes,
                                               LangOpts, Module->IsExternC))
      return Err;
  // Note that Module->PrivateHeaders will not be a TopHeader.

  if (Module::Header UmbrellaHeader = Module->getUmbrellaHeader()) {
    if (Module->Parent) {
      // Include the umbrella header for submodules.
      if (std::error_code Err = addHeaderInclude(UmbrellaHeader.NameAsWritten,
                                                 Includes, LangOpts,
        return Err;
  } else if (Module::DirectoryName UmbrellaDir = Module->getUmbrellaDir()) {
    // Add all of the headers we find in this subdirectory.
    std::error_code EC;
    SmallString<128> DirNative;
    llvm::sys::path::native(UmbrellaDir.Entry->getName(), DirNative);
    for (llvm::sys::fs::recursive_directory_iterator Dir(DirNative, EC), 
         Dir != DirEnd && !EC; Dir.increment(EC)) {
      // Check whether this entry has an extension typically associated with 
      // headers.
      if (!llvm::StringSwitch<bool>(llvm::sys::path::extension(Dir->path()))
          .Cases(".h", ".H", ".hh", ".hpp", true)

      const FileEntry *Header = FileMgr.getFile(Dir->path());
      // FIXME: This shouldn't happen unless there is a file system race. Is
      // that worth diagnosing?
      if (!Header)

      // If this header is marked 'unavailable' in this module, don't include 
      // it.
      if (ModMap.isHeaderUnavailableInModule(Header, Module))

      // Compute the relative path from the directory to this file.
      SmallVector<StringRef, 16> Components;
      auto PathIt = llvm::sys::path::rbegin(Dir->path());
      for (int I = 0; I != Dir.level() + 1; ++I, ++PathIt)
      SmallString<128> RelativeHeader(UmbrellaDir.NameAsWritten);
      for (auto It = Components.rbegin(), End = Components.rend(); It != End;
        llvm::sys::path::append(RelativeHeader, *It);

      // Include this header as part of the umbrella directory.
      if (std::error_code Err = addHeaderInclude(RelativeHeader, Includes,
                                                 LangOpts, Module->IsExternC))
        return Err;

    if (EC)
      return EC;

  // Recurse into submodules.
  for (clang::Module::submodule_iterator Sub = Module->submodule_begin(),
                                      SubEnd = Module->submodule_end();
       Sub != SubEnd; ++Sub)
    if (std::error_code Err = collectModuleHeaderIncludes(
            LangOpts, FileMgr, ModMap, *Sub, Includes))
      return Err;

  return std::error_code();
Example #8
CallGraphNode* ArgumentRecovery::recoverArguments(llvm::CallGraphNode *node)
	Function* fn = node->getFunction();
	if (fn == nullptr)
		// "theoretical nodes", whatever that is
		return nullptr;
	// quick exit if there isn't exactly one argument
	if (fn->arg_size() != 1)
		return nullptr;
	Argument* fnArg = fn->arg_begin();
	if (!isStructType(fnArg))
		return nullptr;
	// This is a nasty NASTY hack that relies on the AA pass being RegisterUse.
	// The data should be moved to a separate helper pass that can be queried from both the AA pass and this one.
	RegisterUse& regUse = getAnalysis<RegisterUse>();
	CallGraph& cg = getAnalysis<CallGraphWrapperPass>().getCallGraph();
	const auto* modRefInfo = regUse.getModRefInfo(fn);
	assert(modRefInfo != nullptr);
	// At this point we pretty much know that we're going to modify the function, so start doing that.
	// Get register offsets from the old function before we start mutilating it.
	auto& registerMap = exposeAllRegisters(fn);
	// Create a new function prototype, asking RegisterUse for which registers should be passed in, and how.
	LLVMContext& ctx = fn->getContext();
	SmallVector<pair<const char*, Type*>, 16> parameters;
	Type* int64 = Type::getInt64Ty(ctx);
	Type* int64ptr = Type::getInt64PtrTy(ctx);
	for (const auto& pair : *modRefInfo)
		if (pair.second != RegisterUse::NoModRef)
			Type* paramType = (pair.second & RegisterUse::Mod) == RegisterUse::Mod ? int64ptr : int64;
			parameters.push_back({pair.first, paramType});
	// Order parameters.
	// FIXME: This could use an ABI-specific sort routine. For now, use a lexicographical sort.
	sort(parameters.begin(), parameters.end(), [](const pair<const char*, Type*>& a, const pair<const char*, Type*>& b) {
		return strcmp(a.first, b.first) < 0;
	// Extract parameter types.
	SmallVector<Type*, 16> parameterTypes;
	for (const auto& pair : parameters)
	// Ideally, we would also do caller analysis here to figure out which output registers are never read, such that
	// we can either eliminate them from the parameter list or pass them by value instead of by address.
	// We would also pick a return value.
	FunctionType* newFunctionType = FunctionType::get(Type::getVoidTy(ctx), parameterTypes, false);

	Function* newFunc = Function::Create(newFunctionType, fn->getLinkage());
	fn->getParent()->getFunctionList().insert(fn, newFunc);
	fn->setName("__hollow_husk__" + newFunc->getName());
	// Set argument names
	size_t i = 0;
	for (Argument& arg : newFunc->args())
	// update call graph
	CallGraphNode* newFuncNode = cg.getOrInsertFunction(newFunc);
	CallGraphNode* oldFuncNode = cg[fn];
	// loop over callers and transform call sites.
	while (!fn->use_empty())
		CallSite cs(fn->user_back());
		Instruction* call = cast<CallInst>(cs.getInstruction());
		Function* caller = call->getParent()->getParent();
		auto& registerPositions = exposeAllRegisters(caller);
		SmallVector<Value*, 16> callParameters;
		for (const auto& pair : parameters)
			// HACKHACK: find a pointer to a 64-bit int in the set.
			Value* registerPointer = nullptr;
			auto range = registerPositions.equal_range(pair.first);
			for (auto iter = range.first; iter != range.second; iter++)
				if (auto gep = dyn_cast<GetElementPtrInst>(iter->second))
				if (gep->getResultElementType() == int64)
					registerPointer = gep;
			assert(registerPointer != nullptr);
			if (isa<PointerType>(pair.second))
				// Create a load instruction. GVN will get rid of it if it's unnecessary.
				LoadInst* load = new LoadInst(registerPointer, pair.first, call);
		CallInst* newCall = CallInst::Create(newFunc, callParameters, "", call);
		// Update AA
		regUse.replaceWithNewValue(call, newCall);
		// Update call graph
		CallGraphNode* calleeNode = cg[caller];
		calleeNode->replaceCallEdge(cs, CallSite(newCall), newFuncNode);
		// Finish replacing
		if (!call->use_empty())
	// Do not fix functions without a body.
	if (!fn->isDeclaration())
		// Fix up function code. Start by moving everything into the new function.
		newFunc->getBasicBlockList().splice(newFunc->begin(), fn->getBasicBlockList());
		// Change register uses
		size_t argIndex = 0;
		auto& argList = newFunc->getArgumentList();
		// Create a temporary insertion point. We don't want an existing instruction since chances are that we'll remove it.
		Instruction* insertionPoint = BinaryOperator::CreateAdd(ConstantInt::get(int64, 0), ConstantInt::get(int64, 0), "noop", newFunc->begin()->begin());
		for (auto iter = argList.begin(); iter != argList.end(); iter++, argIndex++)
			Value* replaceWith = iter;
			const auto& paramTuple = parameters[argIndex];
			if (!isa<PointerType>(paramTuple.second))
				// Create an alloca, copy value from parameter, replace GEP with alloca.
				// This is ugly code gen, but it will optimize easily, and still work if
				// we need a pointer reference to the register.
				auto alloca = new AllocaInst(paramTuple.second, paramTuple.first, insertionPoint);
				new StoreInst(iter, alloca, insertionPoint);
				replaceWith = alloca;
			// Replace all uses with new instance.
			auto iterPair = registerMap.equal_range(paramTuple.first);
			for (auto registerMapIter = iterPair.first; registerMapIter != iterPair.second; registerMapIter++)
				auto& registerValue = registerMapIter->second;
				registerValue = replaceWith;
		// At this point, the uses of the argument struct left should be:
		// * preserved registers
		// * indirect jumps
		const auto& target = getAnalysis<TargetInfo>();
		while (!fnArg->use_empty())
			auto lastUser = fnArg->user_back();
			if (auto user = dyn_cast<GetElementPtrInst>(lastUser))
				// Promote register to alloca.
				const char* maybeName = target.registerName(*user);
				const char* regName = target.largestOverlappingRegister(maybeName);
				assert(regName != nullptr);
				auto alloca = new AllocaInst(user->getResultElementType(), regName, insertionPoint);
				auto call = cast<CallInst>(lastUser);
				Function* intrin = nullptr;
				StringRef intrinName = call->getCalledFunction()->getName();
				if (intrinName == "x86_jump_intrin")
					intrin = indirectJump;
				else if (intrinName == "x86_call_intrin")
					intrin = indirectCall;
					// Can't decompile this function. Delete its body.
					insertionPoint = nullptr;
				// Replace intrinsic with another intrinsic.
				Value* jumpTarget = call->getOperand(2);
				SmallVector<Value*, 16> callArgs;
				for (Argument& arg : argList)
				CallInst* varargCall = CallInst::Create(intrin, callArgs, "", call);
				newFuncNode->replaceCallEdge(CallSite(call), CallSite(varargCall), cg[intrin]);
				regUse.replaceWithNewValue(call, varargCall);
		if (insertionPoint != nullptr)
			// no longer needed
	// At this point nothing should be using the old register argument anymore. (Pray!)
	// Leave the hollow husk of the old function in place to be erased by global DCE.
	registerAddresses[newFunc] = move(registerMap);
	// Should be all.
	return newFuncNode;
Example #9
void StackAllocationPromoter::fixBranchesAndUses(BlockSet &PhiBlocks) {
  // First update uses of the value.
  SmallVector<LoadInst *, 4> collectedLoads;
  for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E;) {
    auto *Inst = UI->getUser();
    bool removedUser = false;

    collectLoads(Inst, collectedLoads);
    for (LoadInst *LI : collectedLoads) {
      SILValue Def;
      // If this block has no predecessors then nothing dominates it and
      // the instruction is unreachable. If the instruction we're
      // examining is a value, replace it with undef. Either way, delete
      // the instruction and move on.
      SILBasicBlock *BB = LI->getParent();
      Def = getLiveInValue(PhiBlocks, BB);

      LLVM_DEBUG(llvm::dbgs() << "*** Replacing " << *LI
                              << " with Def " << *Def);

      // Replace the load with the definition that we found.
      replaceLoad(LI, Def, ASI);
      removedUser = true;

    if (removedUser)

    // If this block has no predecessors then nothing dominates it and
    // the instruction is unreachable. Delete the instruction and move
    // on.
    SILBasicBlock *BB = Inst->getParent();

    if (auto *DVAI = dyn_cast<DebugValueAddrInst>(Inst)) {
      // Replace DebugValueAddr with DebugValue.
      SILValue Def = getLiveInValue(PhiBlocks, BB);
      promoteDebugValueAddr(DVAI, Def, B);

    // Replace destroys with a release of the value.
    if (auto *DAI = dyn_cast<DestroyAddrInst>(Inst)) {
      SILValue Def = getLiveInValue(PhiBlocks, BB);
      replaceDestroy(DAI, Def);

  // Now that all of the uses are fixed we can fix the branches that point
  // to the blocks with the added arguments.

  // For each Block with a new Phi argument:
  for (auto Block : PhiBlocks) {
    // Fix all predecessors.
    for (auto PBBI = Block->getPredecessorBlocks().begin(),
              E = Block->getPredecessorBlocks().end();
         PBBI != E;) {
      auto *PBB = *PBBI;
      assert(PBB && "Invalid block!");
      fixPhiPredBlock(PhiBlocks, Block, PBB);
Example #10
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
    Instruction *TheCall = CS.getInstruction();
    assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
           "Instruction not in function!");

    // If IFI has any state in it, zap it before we fill it in.

    const Function *CalledFunc = CS.getCalledFunction();
    if (CalledFunc == 0 ||          // Can't inline external function or indirect
            CalledFunc->isDeclaration() || // call, or call to a vararg function!
            CalledFunc->getFunctionType()->isVarArg()) return false;

    // If the call to the callee is not a tail call, we must clear the 'tail'
    // flags on any calls that we inline.
    bool MustClearTailCallFlags =
        !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

    // If the call to the callee cannot throw, set the 'nounwind' flag on any
    // calls that we inline.
    bool MarkNoUnwind = CS.doesNotThrow();

    BasicBlock *OrigBB = TheCall->getParent();
    Function *Caller = OrigBB->getParent();

    // GC poses two hazards to inlining, which only occur when the callee has GC:
    //  1. If the caller has no GC, then the callee's GC must be propagated to the
    //     caller.
    //  2. If the caller has a differing GC, it is invalid to inline.
    if (CalledFunc->hasGC()) {
        if (!Caller->hasGC())
        else if (CalledFunc->getGC() != Caller->getGC())
            return false;

    // Get the personality function from the callee if it contains a landing pad.
    Value *CalleePersonality = 0;
    for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
            I != E; ++I)
        if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
            const BasicBlock *BB = II->getUnwindDest();
            const LandingPadInst *LP = BB->getLandingPadInst();
            CalleePersonality = LP->getPersonalityFn();

    // Find the personality function used by the landing pads of the caller. If it
    // exists, then check to see that it matches the personality function used in
    // the callee.
    if (CalleePersonality) {
        for (Function::const_iterator I = Caller->begin(), E = Caller->end();
                I != E; ++I)
            if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
                const BasicBlock *BB = II->getUnwindDest();
                const LandingPadInst *LP = BB->getLandingPadInst();

                // If the personality functions match, then we can perform the
                // inlining. Otherwise, we can't inline.
                // TODO: This isn't 100% true. Some personality functions are proper
                //       supersets of others and can be used in place of the other.
                if (LP->getPersonalityFn() != CalleePersonality)
                    return false;


    // Get an iterator to the last basic block in the function, which will have
    // the new function inlined after it.
    Function::iterator LastBlock = &Caller->back();

    // Make sure to capture all of the return instructions from the cloned
    // function.
    SmallVector<ReturnInst*, 8> Returns;
    ClonedCodeInfo InlinedFunctionInfo;
    Function::iterator FirstNewBlock;

    {   // Scope to destroy VMap after cloning.
        ValueToValueMapTy VMap;

        assert(CalledFunc->arg_size() == CS.arg_size() &&
               "No varargs calls can be inlined!");

        // Calculate the vector of arguments to pass into the function cloner, which
        // matches up the formal to the actual argument values.
        CallSite::arg_iterator AI = CS.arg_begin();
        unsigned ArgNo = 0;
        for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
                E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
            Value *ActualArg = *AI;

            // When byval arguments actually inlined, we need to make the copy implied
            // by them explicit.  However, we don't do this if the callee is readonly
            // or readnone, because the copy would be unneeded: the callee doesn't
            // modify the struct.
            if (CS.isByValArgument(ArgNo)) {
                ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,

                // Calls that we inline may use the new alloca, so we need to clear
                // their 'tail' flags if HandleByValArgument introduced a new alloca and
                // the callee has calls.
                MustClearTailCallFlags |= ActualArg != *AI;

            VMap[I] = ActualArg;

        // We want the inliner to prune the code as it copies.  We would LOVE to
        // have no dead or constant instructions leftover after inlining occurs
        // (which can happen, e.g., because an argument was constant), but we'll be
        // happy with whatever the cloner can do.
        CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
                                  /*ModuleLevelChanges=*/false, Returns, ".i",
                                  &InlinedFunctionInfo, IFI.DL, TheCall);

        // Remember the first block that is newly cloned over.
        FirstNewBlock = LastBlock;

        // Update the callgraph if requested.
        if (IFI.CG)
            UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

        // Update inlined instructions' line number information.
        fixupLineNumbers(Caller, FirstNewBlock, TheCall);

    // If there are any alloca instructions in the block that used to be the entry
    // block for the callee, move them to the entry block of the caller.  First
    // calculate which instruction they should be inserted before.  We insert the
    // instructions at the end of the current alloca list.
        BasicBlock::iterator InsertPoint = Caller->begin()->begin();
        for (BasicBlock::iterator I = FirstNewBlock->begin(),
                E = FirstNewBlock->end(); I != E; ) {
            AllocaInst *AI = dyn_cast<AllocaInst>(I++);
            if (AI == 0) continue;

            // If the alloca is now dead, remove it.  This often occurs due to code
            // specialization.
            if (AI->use_empty()) {

            if (!isa<Constant>(AI->getArraySize()))

            // Keep track of the static allocas that we inline into the caller.

            // Scan for the block of allocas that we can move over, and move them
            // all at once.
            while (isa<AllocaInst>(I) &&
                    isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {

            // Transfer all of the allocas over in a block.  Using splice means
            // that the instructions aren't removed from the symbol table, then
            // reinserted.
                    AI, I);

    // Leave lifetime markers for the static alloca's, scoping them to the
    // function we just inlined.
    if (InsertLifetime && !IFI.StaticAllocas.empty()) {
        IRBuilder<> builder(FirstNewBlock->begin());
        for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
            AllocaInst *AI = IFI.StaticAllocas[ai];

            // If the alloca is already scoped to something smaller than the whole
            // function then there's no need to add redundant, less accurate markers.
            if (hasLifetimeMarkers(AI))

            // Try to determine the size of the allocation.
            ConstantInt *AllocaSize = 0;
            if (ConstantInt *AIArraySize =
                        dyn_cast<ConstantInt>(AI->getArraySize())) {
                if (IFI.DL) {
                    Type *AllocaType = AI->getAllocatedType();
                    uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
                    uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
                    assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
                    // Check that array size doesn't saturate uint64_t and doesn't
                    // overflow when it's multiplied by type size.
                    if (AllocaArraySize != ~0ULL &&
                            UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
                        AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                                      AllocaArraySize * AllocaTypeSize);

            builder.CreateLifetimeStart(AI, AllocaSize);
            for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
                IRBuilder<> builder(Returns[ri]);
                builder.CreateLifetimeEnd(AI, AllocaSize);

    // If the inlined code contained dynamic alloca instructions, wrap the inlined
    // code with llvm.stacksave/llvm.stackrestore intrinsics.
    if (InlinedFunctionInfo.ContainsDynamicAllocas) {
        Module *M = Caller->getParent();
        // Get the two intrinsics we care about.
        Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
        Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

        // Insert the llvm.stacksave.
        CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
                             .CreateCall(StackSave, "savedstack");

        // Insert a call to llvm.stackrestore before any return instructions in the
        // inlined function.
        for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
            IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);

    // If we are inlining tail call instruction through a call site that isn't
    // marked 'tail', we must remove the tail marker for any calls in the inlined
    // code.  Also, calls inlined through a 'nounwind' call site should be marked
    // 'nounwind'.
    if (InlinedFunctionInfo.ContainsCalls &&
            (MustClearTailCallFlags || MarkNoUnwind)) {
        for (Function::iterator BB = FirstNewBlock, E = Caller->end();
                BB != E; ++BB)
            for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
                if (CallInst *CI = dyn_cast<CallInst>(I)) {
                    if (MustClearTailCallFlags)
                    if (MarkNoUnwind)

    // If we are inlining for an invoke instruction, we must make sure to rewrite
    // any call instructions into invoke instructions.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
        HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

    // If we cloned in _exactly one_ basic block, and if that block ends in a
    // return instruction, we splice the body of the inlined callee directly into
    // the calling basic block.
    if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
        // Move all of the instructions right before the call.
        OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                     FirstNewBlock->begin(), FirstNewBlock->end());
        // Remove the cloned basic block.

        // If the call site was an invoke instruction, add a branch to the normal
        // destination.
        if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
            BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);

        // If the return instruction returned a value, replace uses of the call with
        // uses of the returned value.
        if (!TheCall->use_empty()) {
            ReturnInst *R = Returns[0];
            if (TheCall == R->getReturnValue())
        // Since we are now done with the Call/Invoke, we can delete it.

        // Since we are now done with the return instruction, delete it also.

        // We are now done with the inlining.
        return true;

    // Otherwise, we have the normal case, of more than one block to inline or
    // multiple return sites.

    // We want to clone the entire callee function into the hole between the
    // "starter" and "ender" blocks.  How we accomplish this depends on whether
    // this is an invoke instruction or a call instruction.
    BasicBlock *AfterCallBB;
    BranchInst *CreatedBranchToNormalDest = NULL;
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

        // Add an unconditional branch to make this look like the CallInst case...
        CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);

        // Split the basic block.  This guarantees that no PHI nodes will have to be
        // updated due to new incoming edges, and make the invoke case more
        // symmetric to the call case.
        AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,

    } else {  // It's a call
        // If this is a call instruction, we need to split the basic block that
        // the call lives in.
        AfterCallBB = OrigBB->splitBasicBlock(TheCall,

    // Change the branch that used to go to AfterCallBB to branch to the first
    // basic block of the inlined function.
    TerminatorInst *Br = OrigBB->getTerminator();
    assert(Br && Br->getOpcode() == Instruction::Br &&
           "splitBasicBlock broken!");
    Br->setOperand(0, FirstNewBlock);

    // Now that the function is correct, make it a little bit nicer.  In
    // particular, move the basic blocks inserted from the end of the function
    // into the space made by splitting the source basic block.
    Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                       FirstNewBlock, Caller->end());

    // Handle all of the return instructions that we just cloned in, and eliminate
    // any users of the original call/invoke instruction.
    Type *RTy = CalledFunc->getReturnType();

    PHINode *PHI = 0;
    if (Returns.size() > 1) {
        // The PHI node should go at the front of the new basic block to merge all
        // possible incoming values.
        if (!TheCall->use_empty()) {
            PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
            // Anything that used the result of the function call should now use the
            // PHI node as their operand.

        // Loop over all of the return instructions adding entries to the PHI node
        // as appropriate.
        if (PHI) {
            for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
                ReturnInst *RI = Returns[i];
                assert(RI->getReturnValue()->getType() == PHI->getType() &&
                       "Ret value not consistent in function!");
                PHI->addIncoming(RI->getReturnValue(), RI->getParent());

        // Add a branch to the merge points and remove return instructions.
        DebugLoc Loc;
        for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
            ReturnInst *RI = Returns[i];
            BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
            Loc = RI->getDebugLoc();
        // We need to set the debug location to *somewhere* inside the
        // inlined function. The line number may be nonsensical, but the
        // instruction will at least be associated with the right
        // function.
        if (CreatedBranchToNormalDest)
    } else if (!Returns.empty()) {
        // Otherwise, if there is exactly one return value, just replace anything
        // using the return value of the call with the computed value.
        if (!TheCall->use_empty()) {
            if (TheCall == Returns[0]->getReturnValue())

        // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
        BasicBlock *ReturnBB = Returns[0]->getParent();

        // Splice the code from the return block into the block that it will return
        // to, which contains the code that was after the call.

        if (CreatedBranchToNormalDest)

        // Delete the return instruction now and empty ReturnBB now.
    } else if (!TheCall->use_empty()) {
        // No returns, but something is using the return value of the call.  Just
        // nuke the result.

    // Since we are now done with the Call/Invoke, we can delete it.

    // We should always be able to fold the entry block of the function into the
    // single predecessor of the block...
    assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
    BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

    // Splice the code entry block into calling block, right before the
    // unconditional branch.
    CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
    OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

    // Remove the unconditional branch.

    // Now we can remove the CalleeEntry block, which is now empty.

    // If we inserted a phi node, check to see if it has a single value (e.g. all
    // the entries are the same or undef).  If so, remove the PHI so it doesn't
    // block other optimizations.
    if (PHI) {
        if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {

    return true;
Example #11
std::string Triple::normalize(StringRef Str) {
  // Parse into components.
  SmallVector<StringRef, 4> Components;
  Str.split(Components, "-");

  // If the first component corresponds to a known architecture, preferentially
  // use it for the architecture.  If the second component corresponds to a
  // known vendor, preferentially use it for the vendor, etc.  This avoids silly
  // component movement when a component parses as (eg) both a valid arch and a
  // valid os.
  ArchType Arch = UnknownArch;
  if (Components.size() > 0)
    Arch = parseArch(Components[0]);
  VendorType Vendor = UnknownVendor;
  if (Components.size() > 1)
    Vendor = parseVendor(Components[1]);
  OSType OS = UnknownOS;
  if (Components.size() > 2)
    OS = parseOS(Components[2]);
  EnvironmentType Environment = UnknownEnvironment;
  if (Components.size() > 3)
    Environment = parseEnvironment(Components[3]);
  ObjectFormatType ObjectFormat = UnknownObjectFormat;

  // Note which components are already in their final position.  These will not
  // be moved.
  bool Found[4];
  Found[0] = Arch != UnknownArch;
  Found[1] = Vendor != UnknownVendor;
  Found[2] = OS != UnknownOS;
  Found[3] = Environment != UnknownEnvironment;

  // If they are not there already, permute the components into their canonical
  // positions by seeing if they parse as a valid architecture, and if so moving
  // the component to the architecture position etc.
  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
    if (Found[Pos])
      continue; // Already in the canonical position.

    for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
      // Do not reparse any components that already matched.
      if (Idx < array_lengthof(Found) && Found[Idx])

      // Does this component parse as valid for the target position?
      bool Valid = false;
      StringRef Comp = Components[Idx];
      switch (Pos) {
      default: llvm_unreachable("unexpected component type!");
      case 0:
        Arch = parseArch(Comp);
        Valid = Arch != UnknownArch;
      case 1:
        Vendor = parseVendor(Comp);
        Valid = Vendor != UnknownVendor;
      case 2:
        OS = parseOS(Comp);
        Valid = OS != UnknownOS;
      case 3:
        Environment = parseEnvironment(Comp);
        Valid = Environment != UnknownEnvironment;
        if (!Valid) {
          ObjectFormat = parseFormat(Comp);
          Valid = ObjectFormat != UnknownObjectFormat;
      if (!Valid)
        continue; // Nope, try the next component.

      // Move the component to the target position, pushing any non-fixed
      // components that are in the way to the right.  This tends to give
      // good results in the common cases of a forgotten vendor component
      // or a wrongly positioned environment.
      if (Pos < Idx) {
        // Insert left, pushing the existing components to the right.  For
        // example, a-b-i386 -> i386-a-b when moving i386 to the front.
        StringRef CurrentComponent(""); // The empty component.
        // Replace the component we are moving with an empty component.
        std::swap(CurrentComponent, Components[Idx]);
        // Insert the component being moved at Pos, displacing any existing
        // components to the right.
        for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
          // Skip over any fixed components.
          while (i < array_lengthof(Found) && Found[i])
          // Place the component at the new position, getting the component
          // that was at this position - it will be moved right.
          std::swap(CurrentComponent, Components[i]);
      } else if (Pos > Idx) {
        // Push right by inserting empty components until the component at Idx
        // reaches the target position Pos.  For example, pc-a -> -pc-a when
        // moving pc to the second position.
        do {
          // Insert one empty component at Idx.
          StringRef CurrentComponent(""); // The empty component.
          for (unsigned i = Idx; i < Components.size();) {
            // Place the component at the new position, getting the component
            // that was at this position - it will be moved right.
            std::swap(CurrentComponent, Components[i]);
            // If it was placed on top of an empty component then we are done.
            if (CurrentComponent.empty())
            // Advance to the next component, skipping any fixed components.
            while (++i < array_lengthof(Found) && Found[i])
          // The last component was pushed off the end - append it.
          if (!CurrentComponent.empty())

          // Advance Idx to the component's new position.
          while (++Idx < array_lengthof(Found) && Found[Idx])
        } while (Idx < Pos); // Add more until the final position is reached.
      assert(Pos < Components.size() && Components[Pos] == Comp &&
             "Component moved wrong!");
      Found[Pos] = true;

  // Special case logic goes here.  At this point Arch, Vendor and OS have the
  // correct values for the computed components.

  // Stick the corrected components back together to form the normalized string.
  std::string Normalized;
  for (unsigned i = 0, e = Components.size(); i != e; ++i) {
    if (i) Normalized += '-';
    Normalized += Components[i];
  return Normalized;
Example #12
/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
/// newline terminated strings to the specified string if desired.  This
/// information is shown in disassembly dumps when verbose assembly is enabled.
void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
                                  const char *(*getRegName)(unsigned)) {
  // If this is a shuffle operation, the switch should fill in this state.
  SmallVector<int, 8> ShuffleMask;
  const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;

  switch (MI->getOpcode()) {
  case X86::INSERTPSrr:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    Src2Name = getRegName(MI->getOperand(2).getReg());
    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
  case X86::VINSERTPSrr:
    DestName = getRegName(MI->getOperand(0).getReg());
    Src1Name = getRegName(MI->getOperand(1).getReg());
    Src2Name = getRegName(MI->getOperand(2).getReg());
    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);

  case X86::MOVLHPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeMOVLHPSMask(2, ShuffleMask);
  case X86::VMOVLHPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeMOVLHPSMask(2, ShuffleMask);

  case X86::MOVHLPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeMOVHLPSMask(2, ShuffleMask);
  case X86::VMOVHLPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeMOVHLPSMask(2, ShuffleMask);

  case X86::PSHUFDri:
  case X86::VPSHUFDri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::PSHUFDmi:
  case X86::VPSHUFDmi:
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodePSHUFMask(MVT::v4i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
  case X86::VPSHUFDYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPSHUFDYmi:
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodePSHUFMask(MVT::v8i32, MI->getOperand(MI->getNumOperands()-1).getImm(),

  case X86::PSHUFHWri:
  case X86::VPSHUFHWri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::PSHUFHWmi:
  case X86::VPSHUFHWmi:
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPSHUFHWYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPSHUFHWYmi:
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::PSHUFLWri:
  case X86::VPSHUFLWri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::PSHUFLWmi:
  case X86::VPSHUFLWmi:
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPSHUFLWYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPSHUFLWYmi:
    DestName = getRegName(MI->getOperand(0).getReg());

  case X86::PUNPCKHBWrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKHBWrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
  case X86::VPUNPCKHBWrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHBWrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
  case X86::VPUNPCKHBWYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHBWYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
  case X86::PUNPCKHWDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKHWDrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
  case X86::VPUNPCKHWDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHWDrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
  case X86::VPUNPCKHWDYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHWDYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
  case X86::PUNPCKHDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKHDQrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
  case X86::VPUNPCKHDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHDQrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
  case X86::VPUNPCKHDQYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHDQYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
  case X86::PUNPCKHQDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKHQDQrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
  case X86::VPUNPCKHQDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHQDQrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
  case X86::VPUNPCKHQDQYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKHQDQYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);

  case X86::PUNPCKLBWrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKLBWrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
  case X86::VPUNPCKLBWrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLBWrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
  case X86::VPUNPCKLBWYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLBWYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
  case X86::PUNPCKLWDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKLWDrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
  case X86::VPUNPCKLWDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLWDrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
  case X86::VPUNPCKLWDYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLWDYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
  case X86::PUNPCKLDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKLDQrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
  case X86::VPUNPCKLDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLDQrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
  case X86::VPUNPCKLDQYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLDQYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
  case X86::PUNPCKLQDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::PUNPCKLQDQrm:
    Src1Name = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
  case X86::VPUNPCKLQDQrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLQDQrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
  case X86::VPUNPCKLQDQYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPUNPCKLQDQYrm:
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
    DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);

  case X86::SHUFPDrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::SHUFPDrmi:
    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VSHUFPDrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VSHUFPDrmi:
    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VSHUFPDYrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VSHUFPDYrmi:
    DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());

  case X86::SHUFPSrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::SHUFPSrmi:
    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VSHUFPSrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VSHUFPSrmi:
    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VSHUFPSYrri:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VSHUFPSYrmi:
    DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());

  case X86::UNPCKLPDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::UNPCKLPDrm:
    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKLPDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKLPDrm:
    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKLPDYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKLPDYrm:
    DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::UNPCKLPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::UNPCKLPSrm:
    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKLPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKLPSrm:
    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKLPSYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKLPSYrm:
    DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::UNPCKHPDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::UNPCKHPDrm:
    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKHPDrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKHPDrm:
    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKHPDYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKHPDYrm:
    DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::UNPCKHPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::UNPCKHPSrm:
    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKHPSrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKHPSrm:
    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VUNPCKHPSYrr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VUNPCKHPSYrm:
    DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERMILPSri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPERMILPSmi:
    DecodePSHUFMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERMILPSYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPERMILPSYmi:
    DecodePSHUFMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERMILPDri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPERMILPDmi:
    DecodePSHUFMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERMILPDYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPERMILPDYmi:
    DecodePSHUFMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERM2F128rr:
  case X86::VPERM2I128rr:
    Src2Name = getRegName(MI->getOperand(2).getReg());
  case X86::VPERM2F128rm:
  case X86::VPERM2I128rm:
    // For instruction comments purpose, assume the 256-bit vector is v4i64.
    Src1Name = getRegName(MI->getOperand(1).getReg());
    DestName = getRegName(MI->getOperand(0).getReg());
  case X86::VPERMQYri:
  case X86::VPERMPDYri:
    Src1Name = getRegName(MI->getOperand(1).getReg());
  case X86::VPERMQYmi:
  case X86::VPERMPDYmi:
    DestName = getRegName(MI->getOperand(0).getReg());

  // If this was a shuffle operation, print the shuffle mask.
  if (!ShuffleMask.empty()) {
    if (DestName == 0) DestName = Src1Name;
    OS << (DestName ? DestName : "mem") << " = ";

    // If the two sources are the same, canonicalize the input elements to be
    // from the first src so that we get larger element spans.
    if (Src1Name == Src2Name) {
      for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
        if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
            ShuffleMask[i] >= (int)e)        // From second mask.
          ShuffleMask[i] -= e;

    // The shuffle mask specifies which elements of the src1/src2 fill in the
    // destination, with a few sentinel values.  Loop through and print them
    // out.
    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
      if (i != 0)
        OS << ',';
      if (ShuffleMask[i] == SM_SentinelZero) {
        OS << "zero";

      // Otherwise, it must come from src1 or src2.  Print the span of elements
      // that comes from this src.
      bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
      const char *SrcName = isSrc1 ? Src1Name : Src2Name;
      OS << (SrcName ? SrcName : "mem") << '[';
      bool IsFirst = true;
      while (i != e &&
             (int)ShuffleMask[i] >= 0 &&
             (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
        if (!IsFirst)
          OS << ',';
          IsFirst = false;
        OS << ShuffleMask[i] % ShuffleMask.size();
      OS << ']';
      --i;  // For loop increments element #.
    //MI->print(OS, 0);
    OS << "\n";

Example #13
/// Match - Match the pattern string against the input buffer Buffer.  This
/// returns the position that is matched or npos if there is no match.  If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
                      StringMap<StringRef> &VariableTable) const {
  // If this is the EOF pattern, match it immediately.
  if (CheckTy == Check::CheckEOF) {
    MatchLen = 0;
    return Buffer.size();

  // If this is a fixed string pattern, just match it now.
  if (!FixedStr.empty()) {
    MatchLen = FixedStr.size();
    return Buffer.find(FixedStr);

  // Regex match.

  // If there are variable uses, we need to create a temporary string with the
  // actual value.
  StringRef RegExToMatch = RegExStr;
  std::string TmpStr;
  if (!VariableUses.empty()) {
    TmpStr = RegExStr;

    unsigned InsertOffset = 0;
    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
      std::string Value;

      if (VariableUses[i].first[0] == '@') {
        if (!EvaluateExpression(VariableUses[i].first, Value))
          return StringRef::npos;
      } else {
        StringMap<StringRef>::iterator it =
        // If the variable is undefined, return an error.
        if (it == VariableTable.end())
          return StringRef::npos;

        // Look up the value and escape it so that we can put it into the regex.
        Value += Regex::escape(it->second);

      // Plop it into the regex at the adjusted offset.
                    Value.begin(), Value.end());
      InsertOffset += Value.size();

    // Match the newly constructed regex.
    RegExToMatch = TmpStr;

  SmallVector<StringRef, 4> MatchInfo;
  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    return StringRef::npos;

  // Successful regex match.
  assert(!MatchInfo.empty() && "Didn't get any match");
  StringRef FullMatch = MatchInfo[0];

  // If this defines any variables, remember their values.
  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
                                                     E = VariableDefs.end();
       I != E; ++I) {
    assert(I->second < MatchInfo.size() && "Internal paren error");
    VariableTable[I->first] = MatchInfo[I->second];

  MatchLen = FullMatch.size();
  return FullMatch.data()-Buffer.data();
Example #14
// main for opt
int main(int argc, char **argv) {
  llvm::PrettyStackTraceProgram X(argc, argv);

  // Enable debug stream buffering.
  EnableDebugBuffering = true;

  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
  LLVMContext Context;


  // Initialize passes
  PassRegistry &Registry = *PassRegistry::getPassRegistry();
  // For codegen passes, only passes that do IR to IR transformation are
  // supported.


  cl::ParseCommandLineOptions(argc, argv,
    "llvm .bc -> .bc modular optimizer and analysis printer\n");

  if (AnalyzeOnly && NoOutput) {
    errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
    return 1;

  SMDiagnostic Err;

  if (!DisableDITypeMap)

  if (PassRemarksWithHotness)

  if (PassRemarksHotnessThreshold)

  std::unique_ptr<ToolOutputFile> OptRemarkFile;
  if (RemarksFilename != "") {
    std::error_code EC;
    OptRemarkFile =
        llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
    if (EC) {
      errs() << EC.message() << '\n';
      return 1;

  // Load the input module...
  std::unique_ptr<Module> M =
      parseIRFile(InputFilename, Err, Context, !NoVerify, ClDataLayout);

  if (!M) {
    Err.print(argv[0], errs());
    return 1;

  // Strip debug info before running the verifier.
  if (StripDebug)

  // If we are supposed to override the target triple or data layout, do so now.
  if (!TargetTriple.empty())

  // Immediately run the verifier to catch any problems before starting up the
  // pass pipelines.  Otherwise we can crash on broken code during
  // doInitialization().
  if (!NoVerify && verifyModule(*M, &errs())) {
    errs() << argv[0] << ": " << InputFilename
           << ": error: input module is broken!\n";
    return 1;

  // Figure out what stream we are supposed to write to...
  std::unique_ptr<ToolOutputFile> Out;
  std::unique_ptr<ToolOutputFile> ThinLinkOut;
  if (NoOutput) {
    if (!OutputFilename.empty())
      errs() << "WARNING: The -o (output filename) option is ignored when\n"
                "the --disable-output option is used.\n";
  } else {
    // Default to standard output.
    if (OutputFilename.empty())
      OutputFilename = "-";

    std::error_code EC;
    Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None));
    if (EC) {
      errs() << EC.message() << '\n';
      return 1;

    if (!ThinLinkBitcodeFile.empty()) {
          new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::F_None));
      if (EC) {
        errs() << EC.message() << '\n';
        return 1;

  Triple ModuleTriple(M->getTargetTriple());
  std::string CPUStr, FeaturesStr;
  TargetMachine *Machine = nullptr;
  const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();

  if (ModuleTriple.getArch()) {
    CPUStr = getCPUStr();
    FeaturesStr = getFeaturesStr();
    Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options);

  std::unique_ptr<TargetMachine> TM(Machine);

  // Override function attributes based on CPUStr, FeaturesStr, and command line
  // flags.
  setFunctionAttributes(CPUStr, FeaturesStr, *M);

  // If the output is set to be emitted to standard out, and standard out is a
  // console, print out a warning message and refuse to do it.  We don't
  // impress anyone by spewing tons of binary goo to a terminal.
  if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly)
    if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
      NoOutput = true;

  if (PassPipeline.getNumOccurrences() > 0) {
    OutputKind OK = OK_NoOutput;
    if (!NoOutput)
      OK = OutputAssembly
               ? OK_OutputAssembly
               : (OutputThinLTOBC ? OK_OutputThinLTOBitcode : OK_OutputBitcode);

    VerifierKind VK = VK_VerifyInAndOut;
    if (NoVerify)
      VK = VK_NoVerifier;
    else if (VerifyEach)
      VK = VK_VerifyEachPass;

    // The user has asked to use the new pass manager and provided a pipeline
    // string. Hand off the rest of the functionality to the new code for that
    // layer.
    return runPassPipeline(argv[0], *M, TM.get(), Out.get(), ThinLinkOut.get(),
                           OptRemarkFile.get(), PassPipeline, OK, VK,
                           PreserveBitcodeUseListOrder, EmitSummaryIndex,
                           EmitModuleHash, EnableDebugify)
               ? 0
               : 1;

  // Create a PassManager to hold and optimize the collection of passes we are
  // about to build.
  legacy::PassManager Passes;

  // Add an appropriate TargetLibraryInfo pass for the module's triple.
  TargetLibraryInfoImpl TLII(ModuleTriple);

  // The -disable-simplify-libcalls flag actually disables all builtin optzns.
  if (DisableSimplifyLibCalls)
  Passes.add(new TargetLibraryInfoWrapperPass(TLII));

  // Add internal analysis passes from the target machine.
  Passes.add(createTargetTransformInfoWrapperPass(TM ? TM->getTargetIRAnalysis()
                                                     : TargetIRAnalysis()));

  if (EnableDebugify)

  std::unique_ptr<legacy::FunctionPassManager> FPasses;
  if (OptLevelO0 || OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz ||
      OptLevelO3) {
    FPasses.reset(new legacy::FunctionPassManager(M.get()));
        TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()));

  if (PrintBreakpoints) {
    // Default to standard output.
    if (!Out) {
      if (OutputFilename.empty())
        OutputFilename = "-";

      std::error_code EC;
      Out = llvm::make_unique<ToolOutputFile>(OutputFilename, EC,
      if (EC) {
        errs() << EC.message() << '\n';
        return 1;
    NoOutput = true;

  if (TM) {
    // FIXME: We should dyn_cast this when supported.
    auto &LTM = static_cast<LLVMTargetMachine &>(*TM);
    Pass *TPC = LTM.createPassConfig(Passes);

  // Create a new optimization pass for each one specified on the command line
  for (unsigned i = 0; i < PassList.size(); ++i) {
    if (StandardLinkOpts &&
        StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
      StandardLinkOpts = false;

    if (OptLevelO0 && OptLevelO0.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);
      OptLevelO0 = false;

    if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 1, 0);
      OptLevelO1 = false;

    if (OptLevelO2 && OptLevelO2.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 0);
      OptLevelO2 = false;

    if (OptLevelOs && OptLevelOs.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 1);
      OptLevelOs = false;

    if (OptLevelOz && OptLevelOz.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 2);
      OptLevelOz = false;

    if (OptLevelO3 && OptLevelO3.getPosition() < PassList.getPosition(i)) {
      AddOptimizationPasses(Passes, *FPasses, TM.get(), 3, 0);
      OptLevelO3 = false;

    const PassInfo *PassInf = PassList[i];
    Pass *P = nullptr;
    if (PassInf->getNormalCtor())
      P = PassInf->getNormalCtor()();
      errs() << argv[0] << ": cannot create pass: "
             << PassInf->getPassName() << "\n";
    if (P) {
      PassKind Kind = P->getPassKind();
      addPass(Passes, P);

      if (AnalyzeOnly) {
        switch (Kind) {
        case PT_BasicBlock:
          Passes.add(createBasicBlockPassPrinter(PassInf, Out->os(), Quiet));
        case PT_Region:
          Passes.add(createRegionPassPrinter(PassInf, Out->os(), Quiet));
        case PT_Loop:
          Passes.add(createLoopPassPrinter(PassInf, Out->os(), Quiet));
        case PT_Function:
          Passes.add(createFunctionPassPrinter(PassInf, Out->os(), Quiet));
        case PT_CallGraphSCC:
          Passes.add(createCallGraphPassPrinter(PassInf, Out->os(), Quiet));
          Passes.add(createModulePassPrinter(PassInf, Out->os(), Quiet));

    if (PrintEachXForm)
          createPrintModulePass(errs(), "", PreserveAssemblyUseListOrder));

  if (StandardLinkOpts) {
    StandardLinkOpts = false;

  if (OptLevelO0)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);

  if (OptLevelO1)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 1, 0);

  if (OptLevelO2)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 0);

  if (OptLevelOs)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 1);

  if (OptLevelOz)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 2, 2);

  if (OptLevelO3)
    AddOptimizationPasses(Passes, *FPasses, TM.get(), 3, 0);

  if (FPasses) {
    for (Function &F : *M)

  // Check that the module is well formed on completion of optimization
  if (!NoVerify && !VerifyEach)

  if (EnableDebugify)

  // In run twice mode, we want to make sure the output is bit-by-bit
  // equivalent if we run the pass manager again, so setup two buffers and
  // a stream to write to them. Note that llc does something similar and it
  // may be worth to abstract this out in the future.
  SmallVector<char, 0> Buffer;
  SmallVector<char, 0> CompileTwiceBuffer;
  std::unique_ptr<raw_svector_ostream> BOS;
  raw_ostream *OS = nullptr;

  // Write bitcode or assembly to the output as the last step...
  if (!NoOutput && !AnalyzeOnly) {
    OS = &Out->os();
    if (RunTwice) {
      BOS = make_unique<raw_svector_ostream>(Buffer);
      OS = BOS.get();
    if (OutputAssembly) {
      if (EmitSummaryIndex)
        report_fatal_error("Text output is incompatible with -module-summary");
      if (EmitModuleHash)
        report_fatal_error("Text output is incompatible with -module-hash");
      Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder));
    } else if (OutputThinLTOBC)
          *OS, ThinLinkOut ? &ThinLinkOut->os() : nullptr));
      Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder,
                                         EmitSummaryIndex, EmitModuleHash));

  // Before executing passes, print the final values of the LLVM options.

  // If requested, run all passes again with the same pass manager to catch
  // bugs caused by persistent state in the passes
  if (RunTwice) {
    std::unique_ptr<Module> M2(CloneModule(*M));
    CompileTwiceBuffer = Buffer;

  // Now that we have all of the passes ready, run them.

  // Compare the two outputs and make sure they're the same
  if (RunTwice) {
    if (Buffer.size() != CompileTwiceBuffer.size() ||
        (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
         0)) {
      errs() << "Running the pass manager twice changed the output.\n"
                "Writing the result of the second run to the specified output.\n"
                "To generate the one-run comparison binary, just run without\n"
                "the compile-twice option\n";
      Out->os() << BOS->str();
      if (OptRemarkFile)
      return 1;
    Out->os() << BOS->str();

  // Declare success.
  if (!NoOutput || PrintBreakpoints)

  if (OptRemarkFile)

  if (ThinLinkOut)

  return 0;
Example #15
/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
/// all loops have preheaders.
bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
  bool Changed = false;

  // Check to see that no blocks (other than the header) in this loop have
  // predecessors that are not in the loop.  This is not valid for natural
  // loops, but can occur if the blocks are unreachable.  Since they are
  // unreachable we can just shamelessly delete those CFG edges!
  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
       BB != E; ++BB) {
    if (*BB == L->getHeader()) continue;

    SmallPtrSet<BasicBlock*, 4> BadPreds;
    for (pred_iterator PI = pred_begin(*BB),
         PE = pred_end(*BB); PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (!L->contains(P))

    // Delete each unique out-of-loop (and thus dead) predecessor.
    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
         E = BadPreds.end(); I != E; ++I) {

      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
                   << (*I)->getName() << "\n");

      // Inform each successor of each dead pred.
      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
      // Zap the dead pred's terminator and replace it with unreachable.
      TerminatorInst *TI = (*I)->getTerminator();
      new UnreachableInst((*I)->getContext(), *I);
      Changed = true;

  // If there are exiting blocks with branches on undef, resolve the undef in
  // the direction which will exit the loop. This will help simplify loop
  // trip count computations.
  SmallVector<BasicBlock*, 8> ExitingBlocks;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       E = ExitingBlocks.end(); I != E; ++I)
    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
      if (BI->isConditional()) {
        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {

          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
                       << (*I)->getName() << "\n");


          // This may make the loop analyzable, force SCEV recomputation.
          if (SE)

          Changed = true;

  // Does the loop already have a preheader?  If so, don't insert one.
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    Preheader = InsertPreheaderForLoop(L);
    if (Preheader) {
      Changed = true;

  // Next, check to make sure that all exit nodes of the loop only have
  // predecessors that are inside of the loop.  This check guarantees that the
  // loop preheader/header will dominate the exit blocks.  If the exit block has
  // predecessors from outside of the loop, split the edge now.
  SmallVector<BasicBlock*, 8> ExitBlocks;

  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
         E = ExitBlockSet.end(); I != E; ++I) {
    BasicBlock *ExitBlock = *I;
    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
         PI != PE; ++PI)
      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
      // allowed.
      if (!L->contains(*PI)) {
        if (RewriteLoopExitBlock(L, ExitBlock)) {
          Changed = true;

  // If the header has more than two predecessors at this point (from the
  // preheader and from multiple backedges), we must adjust the loop.
  BasicBlock *LoopLatch = L->getLoopLatch();
  if (!LoopLatch) {
    // If this is really a nested loop, rip it out into a child loop.  Don't do
    // this for loops with a giant number of backedges, just factor them into a
    // common backedge instead.
    if (L->getNumBackEdges() < 8) {
      if (SeparateNestedLoop(L, LPM, Preheader)) {
        // This is a big restructuring change, reprocess the whole loop.
        Changed = true;
        // GCC doesn't tail recursion eliminate this.
        goto ReprocessLoop;

    // If we either couldn't, or didn't want to, identify nesting of the loops,
    // insert a new block that all backedges target, then make it jump to the
    // loop header.
    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
    if (LoopLatch) {
      Changed = true;

  // Scan over the PHI nodes in the loop header.  Since they now have only two
  // incoming values (the loop is canonicalized), we may have simplified the PHI
  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
  PHINode *PN;
  for (BasicBlock::iterator I = L->getHeader()->begin();
       (PN = dyn_cast<PHINode>(I++)); )
    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
      if (AA) AA->deleteValue(PN);
      if (SE) SE->forgetValue(PN);

  // If this loop has multiple exits and the exits all go to the same
  // block, attempt to merge the exits. This helps several passes, such
  // as LoopRotation, which do not support loops with multiple exits.
  // SimplifyCFG also does this (and this code uses the same utility
  // function), however this code is loop-aware, where SimplifyCFG is
  // not. That gives it the advantage of being able to hoist
  // loop-invariant instructions out of the way to open up more
  // opportunities, and the disadvantage of having the responsibility
  // to preserve dominator information.
  bool UniqueExit = true;
  if (!ExitBlocks.empty())
    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
      if (ExitBlocks[i] != ExitBlocks[0]) {
        UniqueExit = false;
  if (UniqueExit) {
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      BasicBlock *ExitingBlock = ExitingBlocks[i];
      if (!ExitingBlock->getSinglePredecessor()) continue;
      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
      if (!BI || !BI->isConditional()) continue;
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      if (!CI || CI->getParent() != ExitingBlock) continue;

      // Attempt to hoist out all instructions except for the
      // comparison and the branch.
      bool AllInvariant = true;
      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
        Instruction *Inst = I++;
        // Skip debug info intrinsics.
        if (isa<DbgInfoIntrinsic>(Inst))
        if (Inst == CI)
        if (!L->makeLoopInvariant(Inst, Changed,
                                  Preheader ? Preheader->getTerminator() : 0)) {
          AllInvariant = false;
      if (!AllInvariant) continue;

      // The block has now been cleared of all instructions except for
      // a comparison and a conditional branch. SimplifyCFG may be able
      // to fold it now.
      if (!FoldBranchToCommonDest(BI)) continue;

      // Success. The block is now dead, so remove it from the loop,
      // update the dominator tree and delete it.
      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
                   << ExitingBlock->getName() << "\n");

      // If any reachable control flow within this loop has changed, notify
      // ScalarEvolution. Currently assume the parent loop doesn't change
      // (spliting edges doesn't count). If blocks, CFG edges, or other values
      // in the parent loop change, then we need call to forgetLoop() for the
      // parent instead.
      if (SE)

      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
      Changed = true;

      DomTreeNode *Node = DT->getNode(ExitingBlock);
      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
      while (!Children.empty()) {
        DomTreeNode *Child = Children.front();
        DT->changeImmediateDominator(Child, Node->getIDom());


  return Changed;
Example #16
void StackAllocationPromoter::promoteAllocationToPhi() {
  LLVM_DEBUG(llvm::dbgs() << "*** Placing Phis for : " << *ASI);

  // A list of blocks that will require new Phi values.
  BlockSet PhiBlocks;

  // The "piggy-bank" data-structure that we use for processing the dom-tree
  // bottom-up.
  NodePriorityQueue PQ;

  // Collect all of the stores into the AllocStack. We know that at this point
  // we have at most one store per block.
  for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) {
    SILInstruction *II = UI->getUser();
    // We need to place Phis for this block.
    if (isa<StoreInst>(II)) {
      // If the block is in the dom tree (dominated by the entry block).
      if (DomTreeNode *Node = DT->getNode(II->getParent()))
        PQ.push(std::make_pair(Node, DomTreeLevels[Node]));

  LLVM_DEBUG(llvm::dbgs() << "*** Found: " << PQ.size() << " Defs\n");

  // A list of nodes for which we already calculated the dominator frontier.
  llvm::SmallPtrSet<DomTreeNode *, 32> Visited;

  SmallVector<DomTreeNode *, 32> Worklist;

  // Scan all of the definitions in the function bottom-up using the priority
  // queue.
  while (!PQ.empty()) {
    DomTreeNodePair RootPair = PQ.top();
    DomTreeNode *Root = RootPair.first;
    unsigned RootLevel = RootPair.second;

    // Walk all dom tree children of Root, inspecting their successors. Only
    // J-edges, whose target level is at most Root's level are added to the
    // dominance frontier.

    while (!Worklist.empty()) {
      DomTreeNode *Node = Worklist.pop_back_val();
      SILBasicBlock *BB = Node->getBlock();

      // For all successors of the node:
      for (auto &Succ : BB->getSuccessors()) {
        DomTreeNode *SuccNode = DT->getNode(Succ);

        // Skip D-edges (edges that are dom-tree edges).
        if (SuccNode->getIDom() == Node)

        // Ignore J-edges that point to nodes that are not smaller or equal
        // to the root level.
        unsigned SuccLevel = DomTreeLevels[SuccNode];
        if (SuccLevel > RootLevel)

        // Ignore visited nodes.
        if (!Visited.insert(SuccNode).second)

        // If the new PHInode is not dominated by the allocation then it's dead.
        if (!DT->dominates(ASI->getParent(), SuccNode->getBlock()))

        // If the new PHInode is properly dominated by the deallocation then it
        // is obviously a dead PHInode, so we don't need to insert it.
        if (DSI && DT->properlyDominates(DSI->getParent(),

        // The successor node is a new PHINode. If this is a new PHI node
        // then it may require additional definitions, so add it to the PQ.
        if (PhiBlocks.insert(Succ))
          PQ.push(std::make_pair(SuccNode, SuccLevel));

      // Add the children in the dom-tree to the worklist.
      for (auto CI = Node->begin(), CE = Node->end(); CI != CE; ++CI)
        if (!Visited.count(*CI))

  LLVM_DEBUG(llvm::dbgs() << "*** Found: " << PhiBlocks.size() <<" new PHIs\n");
  NumPhiPlaced += PhiBlocks.size();

  // At this point we calculated the locations of all of the new Phi values.
  // Next, add the Phi values and promote all of the loads and stores into the
  // new locations.

  // Replace the dummy values with new block arguments.

  // Hook up the Phi nodes, loads, and debug_value_addr with incoming values.

  LLVM_DEBUG(llvm::dbgs() << "*** Finished placing Phis ***\n");
Example #17
/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
/// them out into a nested loop.  This is important for code that looks like
/// this:
///  Loop:
///     ...
///     br cond, Loop, Next
///     ...
///     br cond2, Loop, Out
/// To identify this common case, we look at the PHI nodes in the header of the
/// loop.  PHI nodes with unchanging values on one backedge correspond to values
/// that change in the "outer" loop, but not in the "inner" loop.
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
                                       BasicBlock *Preheader) {
  // Don't try to separate loops without a preheader.
  if (!Preheader)
    return 0;

  // The header is not a landing pad; preheader insertion should ensure this.
  assert(!L->getHeader()->isLandingPad() &&
         "Can't insert backedge to landing pad");

  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
  if (PN == 0) return 0;  // No known way to partition.

  // Pull out all predecessors that have varying values in the loop.  This
  // handles the case when a PHI node has multiple instances of itself as
  // arguments.
  SmallVector<BasicBlock*, 8> OuterLoopPreds;
  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
    if (PN->getIncomingValue(i) != PN ||
        !L->contains(PN->getIncomingBlock(i))) {
      // We can't split indirectbr edges.
      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
        return 0;
  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");

  // If ScalarEvolution is around and knows anything about values in
  // this loop, tell it to forget them, because we're about to
  // substantially change it.
  if (SE)

  BasicBlock *Header = L->getHeader();
  BasicBlock *NewBB =
    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", this);

  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
  // code layout too horribly.
  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);

  // Create the new outer loop.
  Loop *NewOuter = new Loop();

  // Change the parent loop to use the outer loop as its child now.
  if (Loop *Parent = L->getParentLoop())
    Parent->replaceChildLoopWith(L, NewOuter);
    LI->changeTopLevelLoop(L, NewOuter);

  // L is now a subloop of our outer loop.

  // Add the new loop to the pass manager queue.

  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
       I != E; ++I)

  // Now reset the header in L, which had been moved by
  // SplitBlockPredecessors for the outer loop.

  // Determine which blocks should stay in L and which should be moved out to
  // the Outer loop now.
  std::set<BasicBlock*> BlocksInL;
  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
    BasicBlock *P = *PI;
    if (DT->dominates(Header, P))
      AddBlockAndPredsToSet(P, Header, BlocksInL);

  // Scan all of the loop children of L, moving them to OuterLoop if they are
  // not part of the inner loop.
  const std::vector<Loop*> &SubLoops = L->getSubLoops();
  for (size_t I = 0; I != SubLoops.size(); )
    if (BlocksInL.count(SubLoops[I]->getHeader()))
      ++I;   // Loop remains in L
      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));

  // Now that we know which blocks are in L and which need to be moved to
  // OuterLoop, move any blocks that need it.
  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
    BasicBlock *BB = L->getBlocks()[i];
    if (!BlocksInL.count(BB)) {
      // Move this block to the parent, updating the exit blocks sets
      if ((*LI)[BB] == L)
        LI->changeLoopFor(BB, NewOuter);

  return NewOuter;
Example #18
Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
                                         BasicBlock *PredBB,
                                         const DominatorTree *DT) {
  // If this is a non-instruction value, it can't require PHI translation.
  Instruction *Inst = dyn_cast<Instruction>(V);
  if (Inst == 0) return V;

  // Determine whether 'Inst' is an input to our PHI translatable expression.
  bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);

  // Handle inputs instructions if needed.
  if (isInput) {
    if (Inst->getParent() != CurBB) {
      // If it is an input defined in a different block, then it remains an
      // input.
      return Inst;

    // If 'Inst' is defined in this block and is an input that needs to be phi
    // translated, we need to incorporate the value into the expression or fail.

    // In either case, the instruction itself isn't an input any longer.
    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));

    // If this is a PHI, go ahead and translate it.
    if (PHINode *PN = dyn_cast<PHINode>(Inst))
      return AddAsInput(PN->getIncomingValueForBlock(PredBB));

    // If this is a non-phi value, and it is analyzable, we can incorporate it
    // into the expression by making all instruction operands be inputs.
    if (!CanPHITrans(Inst))
      return 0;

    // All instruction operands are now inputs (and of course, they may also be
    // defined in this block, so they may need to be phi translated themselves.
    for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
      if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))

  // Ok, it must be an intermediate result (either because it started that way
  // or because we just incorporated it into the expression).  See if its
  // operands need to be phi translated, and if so, reconstruct it.

  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
    Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
    if (PHIIn == 0) return 0;
    if (PHIIn == Cast->getOperand(0))
      return Cast;

    // Find an available version of this cast.

    // Constants are trivial to find.
    if (Constant *C = dyn_cast<Constant>(PHIIn))
      return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
                                              C, Cast->getType()));

    // Otherwise we have to see if a casted version of the incoming pointer
    // is available.  If so, we can use it, otherwise we have to fail.
    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
         UI != E; ++UI) {
      if (CastInst *CastI = dyn_cast<CastInst>(*UI))
        if (CastI->getOpcode() == Cast->getOpcode() &&
            CastI->getType() == Cast->getType() &&
            (!DT || DT->dominates(CastI->getParent(), PredBB)))
          return CastI;
    return 0;

  // Handle getelementptr with at least one PHI translatable operand.
  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
    SmallVector<Value*, 8> GEPOps;
    bool AnyChanged = false;
    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
      if (GEPOp == 0) return 0;

      AnyChanged |= GEPOp != GEP->getOperand(i);

    if (!AnyChanged)
      return GEP;

    // Simplify the GEP to handle 'gep x, 0' -> x etc.
    if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) {
      for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
        RemoveInstInputs(GEPOps[i], InstInputs);

      return AddAsInput(V);

    // Scan to see if we have this GEP available.
    Value *APHIOp = GEPOps[0];
    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
         UI != E; ++UI) {
      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
        if (GEPI->getType() == GEP->getType() &&
            GEPI->getNumOperands() == GEPOps.size() &&
            GEPI->getParent()->getParent() == CurBB->getParent() &&
            (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
          bool Mismatch = false;
          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
            if (GEPI->getOperand(i) != GEPOps[i]) {
              Mismatch = true;
          if (!Mismatch)
            return GEPI;
    return 0;

  // Handle add with a constant RHS.
  if (Inst->getOpcode() == Instruction::Add &&
      isa<ConstantInt>(Inst->getOperand(1))) {
    // PHI translate the LHS.
    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();

    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
    if (LHS == 0) return 0;

    // If the PHI translated LHS is an add of a constant, fold the immediates.
    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
      if (BOp->getOpcode() == Instruction::Add)
        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
          LHS = BOp->getOperand(0);
          RHS = ConstantExpr::getAdd(RHS, CI);
          isNSW = isNUW = false;

          // If the old 'LHS' was an input, add the new 'LHS' as an input.
          if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
            RemoveInstInputs(BOp, InstInputs);

    // See if the add simplifies away.
    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) {
      // If we simplified the operands, the LHS is no longer an input, but Res
      // is.
      RemoveInstInputs(LHS, InstInputs);
      return AddAsInput(Res);

    // If we didn't modify the add, just return it.
    if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
      return Inst;

    // Otherwise, see if we have this add available somewhere.
    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
         UI != E; ++UI) {
      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
        if (BO->getOpcode() == Instruction::Add &&
            BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
            BO->getParent()->getParent() == CurBB->getParent() &&
            (!DT || DT->dominates(BO->getParent(), PredBB)))
          return BO;

    return 0;

  // Otherwise, we failed.
  return 0;
void MatcherGen::
EmitResultInstructionAsOperand(const TreePatternNode *N,
                               SmallVectorImpl<unsigned> &OutputOps) {
  Record *Op = N->getOperator();
  const CodeGenTarget &CGT = CGP.getTargetInfo();
  CodeGenInstruction &II = CGT.getInstruction(Op);
  const DAGInstruction &Inst = CGP.getInstruction(Op);

  // If we can, get the pattern for the instruction we're generating. We derive
  // a variety of information from this pattern, such as whether it has a chain.
  // FIXME2: This is extremely dubious for several reasons, not the least of
  // which it gives special status to instructions with patterns that Pat<>
  // nodes can't duplicate.
  const TreePatternNode *InstPatNode = GetInstPatternNode(Inst, N);

  // NodeHasChain - Whether the instruction node we're creating takes chains.
  bool NodeHasChain = InstPatNode &&
                      InstPatNode->TreeHasProperty(SDNPHasChain, CGP);

  // Instructions which load and store from memory should have a chain,
  // regardless of whether they happen to have an internal pattern saying so.
  if (Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP)
      && (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad ||
      NodeHasChain = true;

  bool isRoot = N == Pattern.getDstPattern();

  // TreeHasOutGlue - True if this tree has glue.
  bool TreeHasInGlue = false, TreeHasOutGlue = false;
  if (isRoot) {
    const TreePatternNode *SrcPat = Pattern.getSrcPattern();
    TreeHasInGlue = SrcPat->TreeHasProperty(SDNPOptInGlue, CGP) ||
                    SrcPat->TreeHasProperty(SDNPInGlue, CGP);

    // FIXME2: this is checking the entire pattern, not just the node in
    // question, doing this just for the root seems like a total hack.
    TreeHasOutGlue = SrcPat->TreeHasProperty(SDNPOutGlue, CGP);

  // NumResults - This is the number of results produced by the instruction in
  // the "outs" list.
  unsigned NumResults = Inst.getNumResults();

  // Number of operands we know the output instruction must have. If it is
  // variadic, we could have more operands.
  unsigned NumFixedOperands = II.Operands.size();

  SmallVector<unsigned, 8> InstOps;

  // Loop over all of the fixed operands of the instruction pattern, emitting
  // code to fill them all in. The node 'N' usually has number children equal to
  // the number of input operands of the instruction.  However, in cases where
  // there are predicate operands for an instruction, we need to fill in the
  // 'execute always' values. Match up the node operands to the instruction
  // operands to do this.
  unsigned ChildNo = 0;
  for (unsigned InstOpNo = NumResults, e = NumFixedOperands;
       InstOpNo != e; ++InstOpNo) {
    // Determine what to emit for this operand.
    Record *OperandNode = II.Operands[InstOpNo].Rec;
    if (OperandNode->isSubClassOf("OperandWithDefaultOps") &&
        !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
      // This is a predicate or optional def operand; emit the
      // 'default ops' operands.
      const DAGDefaultOperand &DefaultOp
        = CGP.getDefaultOperand(OperandNode);
      for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
        EmitResultOperand(DefaultOp.DefaultOps[i], InstOps);

    // Otherwise this is a normal operand or a predicate operand without
    // 'execute always'; emit it.

    // For operands with multiple sub-operands we may need to emit
    // multiple child patterns to cover them all.  However, ComplexPattern
    // children may themselves emit multiple MI operands.
    unsigned NumSubOps = 1;
    if (OperandNode->isSubClassOf("Operand")) {
      DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo");
      if (unsigned NumArgs = MIOpInfo->getNumArgs())
        NumSubOps = NumArgs;

    unsigned FinalNumOps = InstOps.size() + NumSubOps;
    while (InstOps.size() < FinalNumOps) {
      const TreePatternNode *Child = N->getChild(ChildNo);
      unsigned BeforeAddingNumOps = InstOps.size();
      EmitResultOperand(Child, InstOps);
      assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");

      // If the operand is an instruction and it produced multiple results, just
      // take the first one.
      if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))


  // If this is a variadic output instruction (i.e. REG_SEQUENCE), we can't
  // expand suboperands, use default operands, or other features determined from
  // the CodeGenInstruction after the fixed operands, which were handled
  // above. Emit the remaining instructions implicitly added by the use for
  // variable_ops.
  if (II.Operands.isVariadic) {
    for (unsigned I = ChildNo, E = N->getNumChildren(); I < E; ++I)
      EmitResultOperand(N->getChild(I), InstOps);

  // If this node has input glue or explicitly specified input physregs, we
  // need to add chained and glued copyfromreg nodes and materialize the glue
  // input.
  if (isRoot && !PhysRegInputs.empty()) {
    // Emit all of the CopyToReg nodes for the input physical registers.  These
    // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src).
    for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
      AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
    // Even if the node has no other glue inputs, the resultant node must be
    // glued to the CopyFromReg nodes we just generated.
    TreeHasInGlue = true;

  // Result order: node results, chain, glue

  // Determine the result types.
  SmallVector<MVT::SimpleValueType, 4> ResultVTs;
  for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i)

  // If this is the root instruction of a pattern that has physical registers in
  // its result pattern, add output VTs for them.  For example, X86 has:
  //   (set AL, (mul ...))
  // This also handles implicit results like:
  //   (implicit EFLAGS)
  if (isRoot && !Pattern.getDstRegs().empty()) {
    // If the root came from an implicit def in the instruction handling stuff,
    // don't re-add it.
    Record *HandledReg = nullptr;
    if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
      HandledReg = II.ImplicitDefs[0];

    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
      Record *Reg = Pattern.getDstRegs()[i];
      if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
      ResultVTs.push_back(getRegisterValueType(Reg, CGT));

  // If this is the root of the pattern and the pattern we're matching includes
  // a node that is variadic, mark the generated node as variadic so that it
  // gets the excess operands from the input DAG.
  int NumFixedArityOperands = -1;
  if (isRoot &&
      Pattern.getSrcPattern()->NodeHasProperty(SDNPVariadic, CGP))
    NumFixedArityOperands = Pattern.getSrcPattern()->getNumChildren();

  // If this is the root node and multiple matched nodes in the input pattern
  // have MemRefs in them, have the interpreter collect them and plop them onto
  // this node. If there is just one node with MemRefs, leave them on that node
  // even if it is not the root.
  // FIXME3: This is actively incorrect for result patterns with multiple
  // memory-referencing instructions.
  bool PatternHasMemOperands =
    Pattern.getSrcPattern()->TreeHasProperty(SDNPMemOperand, CGP);

  bool NodeHasMemRefs = false;
  if (PatternHasMemOperands) {
    unsigned NumNodesThatLoadOrStore =
      numNodesThatMayLoadOrStore(Pattern.getDstPattern(), CGP);
    bool NodeIsUniqueLoadOrStore = mayInstNodeLoadOrStore(N, CGP) &&
                                   NumNodesThatLoadOrStore == 1;
    NodeHasMemRefs =
      NodeIsUniqueLoadOrStore || (isRoot && (mayInstNodeLoadOrStore(N, CGP) ||
                                             NumNodesThatLoadOrStore != 1));

  assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
         "Node has no result");

  AddMatcher(new EmitNodeMatcher(II.Namespace+"::"+II.TheDef->getName(),
                                 ResultVTs, InstOps,
                                 NodeHasChain, TreeHasInGlue, TreeHasOutGlue,
                                 NodeHasMemRefs, NumFixedArityOperands,

  // The non-chain and non-glue results of the newly emitted node get recorded.
  for (unsigned i = 0, e = ResultVTs.size(); i != e; ++i) {
    if (ResultVTs[i] == MVT::Other || ResultVTs[i] == MVT::Glue) break;
Example #20
/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
/// block.  All newly created instructions are added to the NewInsts list.
/// This returns null on failure.
Value *PHITransAddr::
InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
                           BasicBlock *PredBB, const DominatorTree &DT,
                           SmallVectorImpl<Instruction*> &NewInsts) {
  // See if we have a version of this value already available and dominating
  // PredBB.  If so, there is no need to insert a new instance of it.
  PHITransAddr Tmp(InVal, DL);
  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
    return Tmp.getAddr();

  // If we don't have an available version of this value, it must be an
  // instruction.
  Instruction *Inst = cast<Instruction>(InVal);

  // Handle cast of PHI translatable value.
  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
    Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
                                              CurBB, PredBB, DT, NewInsts);
    if (OpVal == 0) return 0;

    // Otherwise insert a cast at the end of PredBB.
    CastInst *New = CastInst::Create(Cast->getOpcode(),
                                     OpVal, InVal->getType(),
    return New;

  // Handle getelementptr with at least one PHI operand.
  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
    SmallVector<Value*, 8> GEPOps;
    BasicBlock *CurBB = GEP->getParent();
    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
      Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
                                                CurBB, PredBB, DT, NewInsts);
      if (OpVal == 0) return 0;

    GetElementPtrInst *Result =
      GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
    return Result;

#if 0
  // FIXME: This code works, but it is unclear that we actually want to insert
  // a big chain of computation in order to make a value available in a block.
  // This needs to be evaluated carefully to consider its cost trade offs.

  // Handle add with a constant RHS.
  if (Inst->getOpcode() == Instruction::Add &&
      isa<ConstantInt>(Inst->getOperand(1))) {
    // PHI translate the LHS.
    Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
                                              CurBB, PredBB, DT, NewInsts);
    if (OpVal == 0) return 0;

    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
    return Res;

  return 0;
Example #21
void PlistDiagnostics::FlushDiagnosticsImpl(
                                    std::vector<const PathDiagnostic *> &Diags,
                                    FilesMade *filesMade) {
  // Build up a set of FIDs that we use by scanning the locations and
  // ranges of the diagnostics.
  FIDMap FM;
  SmallVector<FileID, 10> Fids;
  const SourceManager* SM = 0;

  if (!Diags.empty())
    SM = &(*(*Diags.begin())->path.begin())->getLocation().getManager();

  for (std::vector<const PathDiagnostic*>::iterator DI = Diags.begin(),
       DE = Diags.end(); DI != DE; ++DI) {

    const PathDiagnostic *D = *DI;

    llvm::SmallVector<const PathPieces *, 5> WorkList;

    while (!WorkList.empty()) {
      const PathPieces &path = *WorkList.back();
      for (PathPieces::const_iterator I = path.begin(), E = path.end();
           I!=E; ++I) {
        const PathDiagnosticPiece *piece = I->getPtr();
        AddFID(FM, Fids, SM, piece->getLocation().asLocation());
        ArrayRef<SourceRange> Ranges = piece->getRanges();
        for (ArrayRef<SourceRange>::iterator I = Ranges.begin(),
                                             E = Ranges.end(); I != E; ++I) {
          AddFID(FM, Fids, SM, I->getBegin());
          AddFID(FM, Fids, SM, I->getEnd());

        if (const PathDiagnosticCallPiece *call =
            dyn_cast<PathDiagnosticCallPiece>(piece)) {
            callEnterWithin = call->getCallEnterWithinCallerEvent();
          if (callEnterWithin)
            AddFID(FM, Fids, SM, callEnterWithin->getLocation().asLocation());

        else if (const PathDiagnosticMacroPiece *macro =
                 dyn_cast<PathDiagnosticMacroPiece>(piece)) {

  // Open the file.
  std::string ErrMsg;
  llvm::raw_fd_ostream o(OutputFile.c_str(), ErrMsg);
  if (!ErrMsg.empty()) {
    llvm::errs() << "warning: could not create file: " << OutputFile << '\n';

  // Write the plist header.
  o << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
  "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" "
  "<plist version=\"1.0\">\n";

  // Write the root object: a <dict> containing...
  //  - "clang_version", the string representation of clang version
  //  - "files", an <array> mapping from FIDs to file names
  //  - "diagnostics", an <array> containing the path diagnostics
  o << "<dict>\n" <<
       " <key>clang_version</key>\n";
  EmitString(o, getClangFullVersion()) << '\n';
  o << " <key>files</key>\n"
       " <array>\n";

  for (SmallVectorImpl<FileID>::iterator I=Fids.begin(), E=Fids.end();
       I!=E; ++I) {
    o << "  ";
    EmitString(o, SM->getFileEntryForID(*I)->getName()) << '\n';

  o << " </array>\n"
       " <key>diagnostics</key>\n"
       " <array>\n";

  for (std::vector<const PathDiagnostic*>::iterator DI=Diags.begin(),
       DE = Diags.end(); DI!=DE; ++DI) {

    o << "  <dict>\n"
         "   <key>path</key>\n";

    const PathDiagnostic *D = *DI;

    o << "   <array>\n";

    for (PathPieces::const_iterator I = D->path.begin(), E = D->path.end(); 
         I != E; ++I)
      ReportDiag(o, **I, FM, *SM, LangOpts);

    o << "   </array>\n";

    // Output the bug type and bug category.
    o << "   <key>description</key>";
    EmitString(o, D->getShortDescription()) << '\n';
    o << "   <key>category</key>";
    EmitString(o, D->getCategory()) << '\n';
    o << "   <key>type</key>";
    EmitString(o, D->getBugType()) << '\n';
    // Output information about the semantic context where
    // the issue occurred.
    if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
      // FIXME: handle blocks, which have no name.
      if (const NamedDecl *ND = dyn_cast<NamedDecl>(DeclWithIssue)) {
        StringRef declKind;
        switch (ND->getKind()) {
          case Decl::CXXRecord:
            declKind = "C++ class";
          case Decl::CXXMethod:
            declKind = "C++ method";
          case Decl::ObjCMethod:
            declKind = "Objective-C method";
          case Decl::Function:
            declKind = "function";
        if (!declKind.empty()) {
          const std::string &declName = ND->getDeclName().getAsString();
          o << "  <key>issue_context_kind</key>";
          EmitString(o, declKind) << '\n';
          o << "  <key>issue_context</key>";
          EmitString(o, declName) << '\n';

        // Output the bug hash for issue unique-ing. Currently, it's just an
        // offset from the beginning of the function.
        if (const Stmt *Body = DeclWithIssue->getBody()) {
          FullSourceLoc Loc(SM->getExpansionLoc(D->getLocation().asLocation()),
          FullSourceLoc FunLoc(SM->getExpansionLoc(Body->getLocStart()), *SM);
          o << "  <key>issue_hash</key><integer>"
              << Loc.getExpansionLineNumber() - FunLoc.getExpansionLineNumber()
              << "</integer>\n";

    // Output the location of the bug.
    o << "  <key>location</key>\n";
    EmitLocation(o, *SM, LangOpts, D->getLocation(), FM, 2);

    // Output the diagnostic to the sub-diagnostic client, if any.
    if (!filesMade->empty()) {
      StringRef lastName;
      PDFileEntry::ConsumerFiles *files = filesMade->getFiles(*D);
      if (files) {
        for (PDFileEntry::ConsumerFiles::const_iterator CI = files->begin(),
                CE = files->end(); CI != CE; ++CI) {
          StringRef newName = CI->first;
          if (newName != lastName) {
            if (!lastName.empty()) {
              o << "  </array>\n";
            lastName = newName;
            o <<  "  <key>" << lastName << "_files</key>\n";
            o << "  <array>\n";
          o << "   <string>" << CI->second << "</string>\n";
        o << "  </array>\n";

    // Close up the entry.
    o << "  </dict>\n";

  o << " </array>\n";

  // Finish.
  o << "</dict>\n</plist>";  
Example #22
Parser::parseGenericParametersBeforeWhere(SourceLoc LAngleLoc,
                        SmallVectorImpl<GenericTypeParamDecl *> &GenericParams) {
  ParserStatus Result;
  SyntaxParsingContext GPSContext(SyntaxContext, SyntaxKind::GenericParameterList);
  bool HasNextParam;
  do {
    SyntaxParsingContext GParamContext(SyntaxContext, SyntaxKind::GenericParameter);
    // Note that we're parsing a declaration.
    StructureMarkerRAII ParsingDecl(*this, Tok.getLoc(),
    if (ParsingDecl.isFailed()) {
      return makeParserError();

    // Parse attributes.
    DeclAttributes attributes;
    if (Tok.hasComment())
      attributes.add(new (Context) RawDocCommentAttr(Tok.getCommentRange()));
    bool foundCCTokenInAttr;
    parseDeclAttributeList(attributes, foundCCTokenInAttr);

    // Parse the name of the parameter.
    Identifier Name;
    SourceLoc NameLoc;
    if (parseIdentifier(Name, NameLoc,
                        diag::expected_generics_parameter_name)) {

    // Parse the ':' followed by a type.
    SmallVector<TypeLoc, 1> Inherited;
    if (Tok.is(tok::colon)) {
      ParserResult<TypeRepr> Ty;

      if (Tok.isAny(tok::identifier, tok::code_complete, tok::kw_protocol,
                    tok::kw_Any)) {
        Ty = parseType();
      } else if (Tok.is(tok::kw_class)) {
        diagnose(Tok, diag::unexpected_class_constraint);
        diagnose(Tok, diag::suggest_anyobject)
        .fixItReplace(Tok.getLoc(), "AnyObject");
      } else {
        diagnose(Tok, diag::expected_generics_type_restriction, Name);

      if (Ty.hasCodeCompletion())
        return makeParserCodeCompletionStatus();

      if (Ty.isNonNull())

    // We always create generic type parameters with an invalid depth.
    // Semantic analysis fills in the depth when it processes the generic
    // parameter list.
    auto Param = new (Context) GenericTypeParamDecl(CurDeclContext, Name, NameLoc,
    if (!Inherited.empty())

    // Attach attributes.
    Param->getAttrs() = attributes;

    // Add this parameter to the scope.

    // Parse the comma, if the list continues.
    HasNextParam = consumeIf(tok::comma);
  } while (HasNextParam);

  return Result;
void PromoteMem2Reg::run() {
  Function &F = *DT.getRoot()->getParent();

  if (AST) PointerAllocaValues.resize(Allocas.size());

  AllocaInfo Info;
  LargeBlockInfo LBI;

  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
    AllocaInst *AI = Allocas[AllocaNum];

    assert(isAllocaPromotable(AI) &&
           "Cannot promote non-promotable alloca!");
    assert(AI->getParent()->getParent() == &F &&
           "All allocas should be in the same function, which is same as DF!");

    if (AI->use_empty()) {
      // If there are no uses of the alloca, just delete it now.
      if (AST) AST->deleteValue(AI);

      // Remove the alloca from the Allocas list, since it has been processed
    // Calculate the set of read and write-locations for each alloca.  This is
    // analogous to finding the 'uses' and 'definitions' of each variable.

    // If there is only a single store to this value, replace any loads of
    // it that are directly dominated by the definition with the value stored.
    if (Info.DefiningBlocks.size() == 1) {
      RewriteSingleStoreAlloca(AI, Info, LBI);

      // Finally, after the scan, check to see if the store is all that is left.
      if (Info.UsingBlocks.empty()) {
        // Record debuginfo for the store and remove the declaration's debuginfo.
        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
          if (!DIB)
            DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
        // Remove the (now dead) store and alloca.

        if (AST) AST->deleteValue(AI);
        // The alloca has been processed, move on.
    // If the alloca is only read and written in one basic block, just perform a
    // linear sweep over the block to eliminate it.
    if (Info.OnlyUsedInOneBlock) {
      PromoteSingleBlockAlloca(AI, Info, LBI);
      // Finally, after the scan, check to see if the stores are all that is
      // left.
      if (Info.UsingBlocks.empty()) {
        // Remove the (now dead) stores and alloca.
        while (!AI->use_empty()) {
          StoreInst *SI = cast<StoreInst>(AI->use_back());
          // Record debuginfo for the store before removing it.
          if (DbgDeclareInst *DDI = Info.DbgDeclare) {
            if (!DIB)
              DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
            ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
        if (AST) AST->deleteValue(AI);
        // The alloca has been processed, move on.
        // The alloca's debuginfo can be removed as well.
        if (DbgDeclareInst *DDI = Info.DbgDeclare)


    // If we haven't computed dominator tree levels, do so now.
    if (DomLevels.empty()) {
      SmallVector<DomTreeNode*, 32> Worklist;

      DomTreeNode *Root = DT.getRootNode();
      DomLevels[Root] = 0;

      while (!Worklist.empty()) {
        DomTreeNode *Node = Worklist.pop_back_val();
        unsigned ChildLevel = DomLevels[Node] + 1;
        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
             CI != CE; ++CI) {
          DomLevels[*CI] = ChildLevel;

    // If we haven't computed a numbering for the BB's in the function, do so
    // now.
    if (BBNumbers.empty()) {
      unsigned ID = 0;
      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
        BBNumbers[I] = ID++;

    // If we have an AST to keep updated, remember some pointer value that is
    // stored into the alloca.
    if (AST)
      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
    // Remember the dbg.declare intrinsic describing this alloca, if any.
    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;

    // At this point, we're committed to promoting the alloca using IDF's, and
    // the standard SSA construction algorithm.  Determine which blocks need PHI
    // nodes and see if we can optimize out some work by avoiding insertion of
    // dead phi nodes.
    DetermineInsertionPoint(AI, AllocaNum, Info);

  if (Allocas.empty())
    return; // All of the allocas must have been trivial!

  // Set the incoming values for the basic block to be null values for all of
  // the alloca's.  We do this in case there is a load of a value that has not
  // been stored yet.  In this case, it will get this null value.
  RenamePassData::ValVector Values(Allocas.size());
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());

  // Walks all basic blocks in the function performing the SSA rename algorithm
  // and inserting the phi nodes we marked as necessary
  std::vector<RenamePassData> RenamePassWorkList;
  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
  do {
    RenamePassData RPD;
    // RenamePass may add new worklist entries.
    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
  } while (!RenamePassWorkList.empty());
  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.

  // Remove the allocas themselves from the function.
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
    Instruction *A = Allocas[i];

    // If there are any uses of the alloca instructions left, they must be in
    // unreachable basic blocks that were not processed by walking the dominator
    // tree. Just delete the users now.
    if (!A->use_empty())
    if (AST) AST->deleteValue(A);

  // Remove alloca's dbg.declare instrinsics from the function.
  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])

  // Loop over all of the PHI nodes and see if there are any that we can get
  // rid of because they merge all of the same incoming values.  This can
  // happen due to undef values coming into the PHI nodes.  This process is
  // iterative, because eliminating one PHI node can cause others to be removed.
  bool EliminatedAPHI = true;
  while (EliminatedAPHI) {
    EliminatedAPHI = false;
    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
      PHINode *PN = I->second;

      // If this PHI node merges one value and/or undefs, get the value.
      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
        if (AST && PN->getType()->isPointerTy())
        EliminatedAPHI = true;
  // At this point, the renamer has added entries to PHI nodes for all reachable
  // code.  Unfortunately, there may be unreachable blocks which the renamer
  // hasn't traversed.  If this is the case, the PHI nodes may not
  // have incoming values for all predecessors.  Loop over all PHI nodes we have
  // created, inserting undef values if they are missing any incoming values.
  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
    // We want to do this once per basic block.  As such, only process a block
    // when we find the PHI that is the first entry in the block.
    PHINode *SomePHI = I->second;
    BasicBlock *BB = SomePHI->getParent();
    if (&BB->front() != SomePHI)

    // Only do work here if there the PHI nodes are missing incoming values.  We
    // know that all PHI nodes that were inserted in a block will have the same
    // number of incoming values, so we can just check any of them.
    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))

    // Get the preds for BB.
    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
    // Ok, now we know that all of the PHI nodes are missing entries for some
    // basic blocks.  Start by sorting the incoming predecessors for efficient
    // access.
    std::sort(Preds.begin(), Preds.end());
    // Now we loop through all BB's which have entries in SomePHI and remove
    // them from the Preds list.
    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
      // Do a log(n) search of the Preds list for the entry we want.
      SmallVector<BasicBlock*, 16>::iterator EntIt =
        std::lower_bound(Preds.begin(), Preds.end(),
      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
             "PHI node has entry for a block which is not a predecessor!");

      // Remove the entry

    // At this point, the blocks left in the preds list must have dummy
    // entries inserted into every PHI nodes for the block.  Update all the phi
    // nodes in this block that we are inserting (there could be phis before
    // mem2reg runs).
    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
    BasicBlock::iterator BBI = BB->begin();
    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
           SomePHI->getNumIncomingValues() == NumBadPreds) {
      Value *UndefVal = UndefValue::get(SomePHI->getType());
      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
        SomePHI->addIncoming(UndefVal, Preds[pred]);
/// Create AST statements which convert from an enum to an Int with a switch.
/// \p stmts The generated statements are appended to this vector.
/// \p parentDC Either an extension or the enum itself.
/// \p enumDecl The enum declaration.
/// \p enumVarDecl The enum input variable.
/// \p funcDecl The parent function.
/// \p indexName The name of the output variable.
/// \return A DeclRefExpr of the output variable (of type Int).
static DeclRefExpr *convertEnumToIndex(SmallVectorImpl<ASTNode> &stmts,
                                       DeclContext *parentDC,
                                       EnumDecl *enumDecl,
                                       VarDecl *enumVarDecl,
                                       AbstractFunctionDecl *funcDecl,
                                       const char *indexName) {
  ASTContext &C = enumDecl->getASTContext();
  Type enumType = enumVarDecl->getType();
  Type intType = C.getIntDecl()->getDeclaredType();

  auto indexVar = new (C) VarDecl(/*static*/false, /*let*/false,
                                  SourceLoc(), C.getIdentifier(indexName),
                                  intType, funcDecl);
  // generate: var indexVar
  Pattern *indexPat = new (C) NamedPattern(indexVar, /*implicit*/ true);
  indexPat = new (C) TypedPattern(indexPat, TypeLoc::withoutLoc(intType));
  auto indexBind = PatternBindingDecl::create(C, SourceLoc(),
                                              indexPat, nullptr, funcDecl);

  unsigned index = 0;
  SmallVector<CaseStmt*, 4> cases;
  for (auto elt : enumDecl->getAllElements()) {
    // generate: case .<Case>:
    auto pat = new (C) EnumElementPattern(TypeLoc::withoutLoc(enumType),
                                          SourceLoc(), SourceLoc(),
                                          Identifier(), elt, nullptr);
    auto labelItem = CaseLabelItem(/*IsDefault=*/false, pat, SourceLoc(),
    // generate: indexVar = <index>
    llvm::SmallString<8> indexVal;
    APInt(32, index++).toString(indexVal, 10, /*signed*/ false);
    auto indexStr = C.AllocateCopy(indexVal);
    auto indexExpr = new (C) IntegerLiteralExpr(StringRef(indexStr.data(),
                                                indexStr.size()), SourceLoc(),
                                                /*implicit*/ true);
    auto indexRef = new (C) DeclRefExpr(indexVar, DeclNameLoc(),
    auto assignExpr = new (C) AssignExpr(indexRef, SourceLoc(),
                                         indexExpr, /*implicit*/ true);
    auto body = BraceStmt::create(C, SourceLoc(), ASTNode(assignExpr),
    cases.push_back(CaseStmt::create(C, SourceLoc(), labelItem,
                                     SourceLoc(), body));
  // generate: switch enumVar { }
  auto enumRef = new (C) DeclRefExpr(enumVarDecl, DeclNameLoc(),
  auto switchStmt = SwitchStmt::create(LabeledStmtInfo(), SourceLoc(), enumRef,
                                       SourceLoc(), cases, SourceLoc(), C);

  return new (C) DeclRefExpr(indexVar, DeclNameLoc(), /*implicit*/ true,
                             AccessSemantics::Ordinary, intType);
Example #25
std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
  SmallVector<const char *, 16> Values;
  AddAllArgValues(Values, Id);
  return std::vector<std::string>(Values.begin(), Values.end());
Example #26
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
void MachineLICM::HoistRegionPostRA() {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)

  unsigned NumRegs = TRI->getNumRegs();
  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.

  SmallVector<CandidateInfo, 32> Candidates;
  SmallSet<int, 32> StoredFIs;

  // Walk the entire region, count number of defs for each register, and
  // collect potential LICM candidates.
  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    MachineBasicBlock *BB = Blocks[i];

    // If the header of the loop containing this basic block is a landing pad,
    // then don't try to hoist instructions out of this loop.
    const MachineLoop *ML = MLI->getLoopFor(BB);
    if (ML && ML->getHeader()->isEHPad()) continue;

    // Conservatively treat live-in's as an external def.
    // FIXME: That means a reload that're reused in successor block(s) will not
    // be LICM'ed.
    for (const auto &LI : BB->liveins()) {
      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)

    SpeculationState = SpeculateUnknown;
    for (MachineBasicBlock::iterator
           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
      MachineInstr *MI = &*MII;
      ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);

  // Gather the registers read / clobbered by the terminator.
  BitVector TermRegs(NumRegs);
  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
  if (TI != Preheader->end()) {
    for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = TI->getOperand(i);
      if (!MO.isReg())
      unsigned Reg = MO.getReg();
      if (!Reg)
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)

  // Now evaluate whether the potential candidates qualify.
  // 1. Check if the candidate defined register is defined by another
  //    instruction in the loop.
  // 2. If the candidate is a load from stack slot (always true for now),
  //    check if the slot is stored anywhere in the loop.
  // 3. Make sure candidate def should not clobber
  //    registers read by the terminator. Similarly its def should not be
  //    clobbered by the terminator.
  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
    if (Candidates[i].FI != INT_MIN &&

    unsigned Def = Candidates[i].Def;
    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
      bool Safe = true;
      MachineInstr *MI = Candidates[i].MI;
      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
        const MachineOperand &MO = MI->getOperand(j);
        if (!MO.isReg() || MO.isDef() || !MO.getReg())
        unsigned Reg = MO.getReg();
        if (PhysRegDefs.test(Reg) ||
            PhysRegClobbers.test(Reg)) {
          // If it's using a non-loop-invariant register, then it's obviously
          // not safe to hoist.
          Safe = false;
      if (Safe)
        HoistPostRA(MI, Candidates[i].Def);
Example #27
void SILGenFunction::emitClassConstructorAllocator(ConstructorDecl *ctor) {
  assert(!ctor->isFactoryInit() && "factories should not be emitted here");

  // Emit the prolog. Since we're just going to forward our args directly
  // to the initializer, don't allocate local variables for them.
  RegularLocation Loc(ctor);

  // Forward the constructor arguments.
  // FIXME: Handle 'self' along with the other body patterns.
  SmallVector<SILValue, 8> args;
  bindParametersForForwarding(ctor->getParameters(), args);

  SILValue selfMetaValue = emitConstructorMetatypeArg(*this, ctor);

  // Allocate the "self" value.
  VarDecl *selfDecl = ctor->getImplicitSelfDecl();
  SILType selfTy = getLoweredType(selfDecl->getType());
  assert(selfTy.hasReferenceSemantics() &&
         "can't emit a value type ctor here");

  // Use alloc_ref to allocate the object.
  // TODO: allow custom allocation?
  // FIXME: should have a cleanup in case of exception
  auto selfClassDecl = ctor->getDeclContext()->getSelfClassDecl();

  SILValue selfValue;

  // Allocate the 'self' value.
  bool useObjCAllocation = usesObjCAllocator(selfClassDecl);

  if (ctor->hasClangNode() || ctor->isConvenienceInit()) {
    assert(ctor->hasClangNode() || ctor->isObjC());
    // For an allocator thunk synthesized for an @objc convenience initializer
    // or imported Objective-C init method, allocate using the metatype.
    SILValue allocArg = selfMetaValue;

    // When using Objective-C allocation, convert the metatype
    // argument to an Objective-C metatype.
    if (useObjCAllocation) {
      auto metaTy = allocArg->getType().castTo<MetatypeType>();
      metaTy = CanMetatypeType::get(metaTy.getInstanceType(),
      allocArg = B.createThickToObjCMetatype(Loc, allocArg,

    selfValue = B.createAllocRefDynamic(Loc, allocArg, selfTy,
                                        useObjCAllocation, {}, {});
  } else {
    // For a designated initializer, we know that the static type being
    // allocated is the type of the class that defines the designated
    // initializer.
    selfValue = B.createAllocRef(Loc, selfTy, useObjCAllocation, false,
                                 ArrayRef<SILType>(), ArrayRef<SILValue>());

  // Call the initializer. Always use the Swift entry point, which will be a
  // bridging thunk if we're calling ObjC.
  auto initConstant = SILDeclRef(ctor, SILDeclRef::Kind::Initializer);

  ManagedValue initVal;
  SILType initTy;

  // Call the initializer.
  SubstitutionMap subMap;
  if (auto *genericEnv = ctor->getGenericEnvironmentOfContext()) {
    auto *genericSig = genericEnv->getGenericSignature();
    subMap = SubstitutionMap::get(
      [&](SubstitutableType *t) -> Type {
        return genericEnv->mapTypeIntoContext(

  std::tie(initVal, initTy)
    = emitSiblingMethodRef(Loc, selfValue, initConstant, subMap);

  SILValue initedSelfValue = emitApplyWithRethrow(Loc, initVal.forward(*this),
                                                  initTy, subMap, args);


  // Return the initialized 'self'.
Example #28
/// Walk the specified loop in the CFG (defined by all blocks dominated by the
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)

  SmallVector<MachineDomTreeNode*, 32> Scopes;
  SmallVector<MachineDomTreeNode*, 8> WorkList;
  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;

  // Perform a DFS walk to determine the order of visit.
  while (!WorkList.empty()) {
    MachineDomTreeNode *Node = WorkList.pop_back_val();
    assert(Node && "Null dominator tree node?");
    MachineBasicBlock *BB = Node->getBlock();

    // If the header of the loop containing this basic block is a landing pad,
    // then don't try to hoist instructions out of this loop.
    const MachineLoop *ML = MLI->getLoopFor(BB);
    if (ML && ML->getHeader()->isEHPad())

    // If this subregion is not in the top level loop at all, exit.
    if (!CurLoop->contains(BB))

    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
    unsigned NumChildren = Children.size();

    // Don't hoist things out of a large switch statement.  This often causes
    // code to be hoisted that wasn't going to be executed, and increases
    // register pressure in a situation where it's likely to matter.
    if (BB->succ_size() >= 25)
      NumChildren = 0;

    OpenChildren[Node] = NumChildren;
    // Add children in reverse order as then the next popped worklist node is
    // the first child of this node.  This means we ultimately traverse the
    // DOM tree in exactly the same order as if we'd recursed.
    for (int i = (int)NumChildren-1; i >= 0; --i) {
      MachineDomTreeNode *Child = Children[i];
      ParentMap[Child] = Node;

  if (Scopes.size() == 0)

  // Compute registers which are livein into the loop headers.

  // Now perform LICM.
  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
    MachineDomTreeNode *Node = Scopes[i];
    MachineBasicBlock *MBB = Node->getBlock();


    // Process the block
    SpeculationState = SpeculateUnknown;
    for (MachineBasicBlock::iterator
         MII = MBB->begin(), E = MBB->end(); MII != E; ) {
      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
      MachineInstr *MI = &*MII;
      if (!Hoist(MI, Preheader))
      MII = NextMII;

    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
    ExitScopeIfDone(Node, OpenChildren, ParentMap);
/// EmitExceptionTable - Emit landing pads and actions.
/// The general organization of the table is complex, but the basic concepts are
/// easy.  First there is a header which describes the location and organization
/// of the three components that follow.
///  1. The landing pad site information describes the range of code covered by
///     the try.  In our case it's an accumulation of the ranges covered by the
///     invokes in the try.  There is also a reference to the landing pad that
///     handles the exception once processed.  Finally an index into the actions
///     table.
///  2. The action table, in our case, is composed of pairs of type IDs and next
///     action offset.  Starting with the action index from the landing pad
///     site, each type ID is checked for a match to the current exception.  If
///     it matches then the exception and type id are passed on to the landing
///     pad.  Otherwise the next action is looked up.  This chain is terminated
///     with a next action of zero.  If no type id is found then the frame is
///     unwound and handling continues.
///  3. Type ID table contains references to all the C++ typeinfo for all
///     catches in the function.  This tables is reverse indexed base 1.
void DwarfException::EmitExceptionTable() {
  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();

  // Sort the landing pads in order of their type ids.  This is used to fold
  // duplicate actions.
  SmallVector<const LandingPadInfo *, 64> LandingPads;

  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)

  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);

  // Compute the actions table and gather the first action index for each
  // landing pad site.
  SmallVector<ActionEntry, 32> Actions;
  SmallVector<unsigned, 64> FirstActions;
  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);

  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
  // try-ranges for them need be deduced when using DWARF exception handling.
  RangeMapType PadMap;
  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
    const LandingPadInfo *LandingPad = LandingPads[i];
    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
      PadRange P = { i, j };
      PadMap[BeginLabel] = P;

  // Compute the call-site table.
  SmallVector<CallSiteEntry, 64> CallSites;
  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);

  // Final tallies.

  // Call sites.
  bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;

  unsigned CallSiteTableLength;
  if (IsSJLJ)
    CallSiteTableLength = 0;
  else {
    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
    CallSiteTableLength =
      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);

  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
    CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
    if (IsSJLJ)
      CallSiteTableLength += MCAsmInfo::getULEB128Size(i);

  // Type infos.
  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
  unsigned TTypeEncoding;
  unsigned TypeFormatSize;

  if (!HaveTTData) {
    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
    // that we're omitting that bit.
    TTypeEncoding = dwarf::DW_EH_PE_omit;
    // dwarf::DW_EH_PE_absptr
    TypeFormatSize = Asm->getTargetData().getPointerSize();
  } else {
    // Okay, we have actual filters or typeinfos to emit.  As such, we need to
    // pick a type encoding for them.  We're about to emit a list of pointers to
    // typeinfo objects at the end of the LSDA.  However, unless we're in static
    // mode, this reference will require a relocation by the dynamic linker.
    // Because of this, we have a couple of options:
    //   1) If we are in -static mode, we can always use an absolute reference
    //      from the LSDA, because the static linker will resolve it.
    //   2) Otherwise, if the LSDA section is writable, we can output the direct
    //      reference to the typeinfo and allow the dynamic linker to relocate
    //      it.  Since it is in a writable section, the dynamic linker won't
    //      have a problem.
    //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
    //      we need to use some form of indirection.  For example, on Darwin,
    //      we can output a statically-relocatable reference to a dyld stub. The
    //      offset to the stub is constant, but the contents are in a section
    //      that is updated by the dynamic linker.  This is easy enough, but we
    //      need to tell the personality function of the unwinder to indirect
    //      through the dyld stub.
    // FIXME: When (3) is actually implemented, we'll have to emit the stubs
    // somewhere.  This predicate should be moved to a shared location that is
    // in target-independent code.
    TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);

  // Begin the exception table.
  // Sometimes we want not to emit the data into separate section (e.g. ARM
  // EHABI). In this case LSDASection will be NULL.
  if (LSDASection)

  // Emit the LSDA.
  MCSymbol *GCCETSym =

  if (IsSJLJ)

  // Emit the LSDA header.
  Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
  Asm->EmitEncodingByte(TTypeEncoding, "@TType");

  // The type infos need to be aligned. GCC does this by inserting padding just
  // before the type infos. However, this changes the size of the exception
  // table, so you need to take this into account when you output the exception
  // table size. However, the size is output using a variable length encoding.
  // So by increasing the size by inserting padding, you may increase the number
  // of bytes used for writing the size. If it increases, say by one byte, then
  // you now need to output one less byte of padding to get the type infos
  // aligned. However this decreases the size of the exception table. This
  // changes the value you have to output for the exception table size. Due to
  // the variable length encoding, the number of bytes used for writing the
  // length may decrease. If so, you then have to increase the amount of
  // padding. And so on. If you look carefully at the GCC code you will see that
  // it indeed does this in a loop, going on and on until the values stabilize.
  // We chose another solution: don't output padding inside the table like GCC
  // does, instead output it before the table.
  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
  unsigned CallSiteTableLengthSize =
  unsigned TTypeBaseOffset =
    sizeof(int8_t) +                            // Call site format
    CallSiteTableLengthSize +                   // Call site table length size
    CallSiteTableLength +                       // Call site table length
    SizeActions +                               // Actions size
  unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
  unsigned TotalSize =
    sizeof(int8_t) +                            // LPStart format
    sizeof(int8_t) +                            // TType format
    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
    TTypeBaseOffset;                            // TType base offset
  unsigned SizeAlign = (4 - TotalSize) & 3;

  if (HaveTTData) {
    // Account for any extra padding that will be added to the call site table
    // length.
    Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
    SizeAlign = 0;

  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();

  // SjLj Exception handling
  if (IsSJLJ) {
    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");

    // Add extra padding if it wasn't added to the TType base offset.
    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);

    // Emit the landing pad site information.
    unsigned idx = 0;
    for (SmallVectorImpl<CallSiteEntry>::const_iterator
         I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
      const CallSiteEntry &S = *I;

      // Offset of the landing pad, counted in 16-byte bundles relative to the
      // @LPStart address.
      if (VerboseAsm) {
        Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
        Asm->OutStreamer.AddComment("  On exception at call site "+Twine(idx));

      // Offset of the first associated action record, relative to the start of
      // the action table. This value is biased by 1 (1 indicates the start of
      // the action table), and 0 indicates that there are no actions.
      if (VerboseAsm) {
        if (S.Action == 0)
          Asm->OutStreamer.AddComment("  Action: cleanup");
          Asm->OutStreamer.AddComment("  Action: " +
                                      Twine((S.Action - 1) / 2 + 1));
  } else {
    // DWARF Exception handling

    // The call-site table is a list of all call sites that may throw an
    // exception (including C++ 'throw' statements) in the procedure
    // fragment. It immediately follows the LSDA header. Each entry indicates,
    // for a given call, the first corresponding action record and corresponding
    // landing pad.
    // The table begins with the number of bytes, stored as an LEB128
    // compressed, unsigned integer. The records immediately follow the record
    // count. They are sorted in increasing call-site address. Each record
    // indicates:
    //   * The position of the call-site.
    //   * The position of the landing pad.
    //   * The first action record for that call site.
    // A missing entry in the call-site table indicates that a call is not
    // supposed to throw.

    // Emit the landing pad call site table.
    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");

    // Add extra padding if it wasn't added to the TType base offset.
    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);

    unsigned Entry = 0;
    for (SmallVectorImpl<CallSiteEntry>::const_iterator
         I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
      const CallSiteEntry &S = *I;

      MCSymbol *EHFuncBeginSym =
        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());

      MCSymbol *BeginLabel = S.BeginLabel;
      if (BeginLabel == 0)
        BeginLabel = EHFuncBeginSym;
      MCSymbol *EndLabel = S.EndLabel;
      if (EndLabel == 0)
        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());

      // Offset of the call site relative to the previous call site, counted in
      // number of 16-byte bundles. The first call site is counted relative to
      // the start of the procedure fragment.
      if (VerboseAsm)
        Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
      Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
      if (VerboseAsm)
        Asm->OutStreamer.AddComment(Twine("  Call between ") +
                                    BeginLabel->getName() + " and " +
      Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);

      // Offset of the landing pad, counted in 16-byte bundles relative to the
      // @LPStart address.
      if (!S.PadLabel) {
        if (VerboseAsm)
          Asm->OutStreamer.AddComment("    has no landing pad");
        Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
      } else {
        if (VerboseAsm)
          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);

      // Offset of the first associated action record, relative to the start of
      // the action table. This value is biased by 1 (1 indicates the start of
      // the action table), and 0 indicates that there are no actions.
      if (VerboseAsm) {
        if (S.Action == 0)
          Asm->OutStreamer.AddComment("  On action: cleanup");
          Asm->OutStreamer.AddComment("  On action: " +
                                      Twine((S.Action - 1) / 2 + 1));

  // Emit the Action Table.
  int Entry = 0;
  for (SmallVectorImpl<ActionEntry>::const_iterator
         I = Actions.begin(), E = Actions.end(); I != E; ++I) {
    const ActionEntry &Action = *I;

    if (VerboseAsm) {
      // Emit comments that decode the action table.
      Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");

    // Type Filter
    //   Used by the runtime to match the type of the thrown exception to the
    //   type of the catch clauses or the types in the exception specification.
    if (VerboseAsm) {
      if (Action.ValueForTypeID > 0)
        Asm->OutStreamer.AddComment("  Catch TypeInfo " +
      else if (Action.ValueForTypeID < 0)
        Asm->OutStreamer.AddComment("  Filter TypeInfo " +
        Asm->OutStreamer.AddComment("  Cleanup");

    // Action Record
    //   Self-relative signed displacement in bytes of the next action record,
    //   or 0 if there is no next action record.
    if (VerboseAsm) {
      if (Action.NextAction == 0) {
        Asm->OutStreamer.AddComment("  No further actions");
      } else {
        unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
        Asm->OutStreamer.AddComment("  Continue to action "+Twine(NextAction));

  // Emit the Catch TypeInfos.
  if (VerboseAsm && !TypeInfos.empty()) {
    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
    Entry = TypeInfos.size();

  for (std::vector<const GlobalVariable *>::const_reverse_iterator
         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
    const GlobalVariable *GV = *I;
    if (VerboseAsm)
      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
    if (GV)
      Asm->EmitReference(GV, TTypeEncoding);

  // Emit the Exception Specifications.
  if (VerboseAsm && !FilterIds.empty()) {
    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
    Entry = 0;
  for (std::vector<unsigned>::const_iterator
         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
    unsigned TypeID = *I;
    if (VerboseAsm) {
      if (TypeID != 0)
        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));


Example #30
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
/// TODO: sret.
MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     bool isTailCall,
                                     const SmallVectorImpl<ISD::OutputArg>
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
                 getTargetMachine(), ArgLocs, *DAG.getContext());

  CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);

  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getNextStackOffset();

  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
                                                      getPointerTy(), true));

  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
  SmallVector<SDValue, 12> MemOpChains;
  SDValue StackPtr;

  // Walk the register/memloc assignments, inserting copies/loads.
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];

    SDValue Arg = OutVals[i];

    // Promote the value if needed.
    switch (VA.getLocInfo()) {
      default: llvm_unreachable("Unknown loc info!");
      case CCValAssign::Full: break;
      case CCValAssign::SExt:
        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
      case CCValAssign::ZExt:
        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
      case CCValAssign::AExt:
        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);

    // Arguments that can be passed on register must be kept at RegsToPass
    // vector
    if (VA.isRegLoc()) {
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {

      if (StackPtr.getNode() == 0)
        StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());

      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),

      SDValue MemOp;
      ISD::ArgFlagsTy Flags = Outs[i].Flags;

      if (Flags.isByVal()) {
        SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
        MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
      } else {
        MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),
                             false, false, 0);


  // Transform all store nodes into one single node because all store nodes are
  // independent of each other.
  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());

  // Build a sequence of copy-to-reg nodes chained together with token chain and
  // flag operands which copy the outgoing args into registers.  The InFlag in
  // necessary since all emitted instructions must be stuck together.
  SDValue InFlag;
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                             RegsToPass[i].second, InFlag);
    InFlag = Chain.getValue(1);

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  // Likewise ExternalSymbol -> TargetExternalSymbol.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);

  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;

  // Add argument registers to the end of the list so that they are
  // known live into the call.
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)

  if (InFlag.getNode())

  Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
  InFlag = Chain.getValue(1);

  // Create the CALLSEQ_END node.
  Chain = DAG.getCALLSEQ_END(Chain,
                             DAG.getConstant(NumBytes, getPointerTy(), true),
                             DAG.getConstant(0, getPointerTy(), true),
  InFlag = Chain.getValue(1);

  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
                         DAG, InVals);