示例#1
0
/// Tests whether a function is "malloc-like".
///
/// A function is "malloc-like" if it returns either null or a pointer that
/// doesn't alias any other pointer visible to the caller.
static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
    SmallSetVector<Value *, 8> FlowsToReturn;
    for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
        if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
            FlowsToReturn.insert(Ret->getReturnValue());

    for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
        Value *RetVal = FlowsToReturn[i];

        if (Constant *C = dyn_cast<Constant>(RetVal)) {
            if (!C->isNullValue() && !isa<UndefValue>(C))
                return false;

            continue;
        }

        if (isa<Argument>(RetVal))
            return false;

        if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
            switch (RVI->getOpcode()) {
            // Extend the analysis by looking upwards.
            case Instruction::BitCast:
            case Instruction::GetElementPtr:
            case Instruction::AddrSpaceCast:
                FlowsToReturn.insert(RVI->getOperand(0));
                continue;
            case Instruction::Select: {
                SelectInst *SI = cast<SelectInst>(RVI);
                FlowsToReturn.insert(SI->getTrueValue());
                FlowsToReturn.insert(SI->getFalseValue());
                continue;
            }
            case Instruction::PHI: {
                PHINode *PN = cast<PHINode>(RVI);
                for (Value *IncValue : PN->incoming_values())
                    FlowsToReturn.insert(IncValue);
                continue;
            }

            // Check whether the pointer came from an allocation.
            case Instruction::Alloca:
                break;
            case Instruction::Call:
            case Instruction::Invoke: {
                CallSite CS(RVI);
                if (CS.paramHasAttr(0, Attribute::NoAlias))
                    break;
                if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
                    break;
            } // fall-through
            default:
                return false; // Did not come from an allocation.
            }

        if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
            return false;
    }

    return true;
}
示例#2
0
// Analyze interleaved accesses and collect them into interleaved load and
// store groups.
//
// When generating code for an interleaved load group, we effectively hoist all
// loads in the group to the location of the first load in program order. When
// generating code for an interleaved store group, we sink all stores to the
// location of the last store. This code motion can change the order of load
// and store instructions and may break dependences.
//
// The code generation strategy mentioned above ensures that we won't violate
// any write-after-read (WAR) dependences.
//
// E.g., for the WAR dependence:  a = A[i];      // (1)
//                                A[i] = b;      // (2)
//
// The store group of (2) is always inserted at or below (2), and the load
// group of (1) is always inserted at or above (1). Thus, the instructions will
// never be reordered. All other dependences are checked to ensure the
// correctness of the instruction reordering.
//
// The algorithm visits all memory accesses in the loop in bottom-up program
// order. Program order is established by traversing the blocks in the loop in
// reverse postorder when collecting the accesses.
//
// We visit the memory accesses in bottom-up order because it can simplify the
// construction of store groups in the presence of write-after-write (WAW)
// dependences.
//
// E.g., for the WAW dependence:  A[i] = a;      // (1)
//                                A[i] = b;      // (2)
//                                A[i + 1] = c;  // (3)
//
// We will first create a store group with (3) and (2). (1) can't be added to
// this group because it and (2) are dependent. However, (1) can be grouped
// with other accesses that may precede it in program order. Note that a
// bottom-up order does not imply that WAW dependences should not be checked.
void InterleavedAccessInfo::analyzeInterleaving(
                                 bool EnablePredicatedInterleavedMemAccesses) {
  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
  const ValueToValueMap &Strides = LAI->getSymbolicStrides();

  // Holds all accesses with a constant stride.
  MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
  collectConstStrideAccesses(AccessStrideInfo, Strides);

  if (AccessStrideInfo.empty())
    return;

  // Collect the dependences in the loop.
  collectDependences();

  // Holds all interleaved store groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> StoreGroups;
  // Holds all interleaved load groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> LoadGroups;

  // Search in bottom-up program order for pairs of accesses (A and B) that can
  // form interleaved load or store groups. In the algorithm below, access A
  // precedes access B in program order. We initialize a group for B in the
  // outer loop of the algorithm, and then in the inner loop, we attempt to
  // insert each A into B's group if:
  //
  //  1. A and B have the same stride,
  //  2. A and B have the same memory object size, and
  //  3. A belongs in B's group according to its distance from B.
  //
  // Special care is taken to ensure group formation will not break any
  // dependences.
  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
       BI != E; ++BI) {
    Instruction *B = BI->first;
    StrideDescriptor DesB = BI->second;

    // Initialize a group for B if it has an allowable stride. Even if we don't
    // create a group for B, we continue with the bottom-up algorithm to ensure
    // we don't break any of B's dependences.
    InterleaveGroup *Group = nullptr;
    if (isStrided(DesB.Stride) && 
        (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
      Group = getInterleaveGroup(B);
      if (!Group) {
        LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
                          << '\n');
        Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
      }
      if (B->mayWriteToMemory())
        StoreGroups.insert(Group);
      else
        LoadGroups.insert(Group);
    }

    for (auto AI = std::next(BI); AI != E; ++AI) {
      Instruction *A = AI->first;
      StrideDescriptor DesA = AI->second;

      // Our code motion strategy implies that we can't have dependences
      // between accesses in an interleaved group and other accesses located
      // between the first and last member of the group. Note that this also
      // means that a group can't have more than one member at a given offset.
      // The accesses in a group can have dependences with other accesses, but
      // we must ensure we don't extend the boundaries of the group such that
      // we encompass those dependent accesses.
      //
      // For example, assume we have the sequence of accesses shown below in a
      // stride-2 loop:
      //
      //  (1, 2) is a group | A[i]   = a;  // (1)
      //                    | A[i-1] = b;  // (2) |
      //                      A[i-3] = c;  // (3)
      //                      A[i]   = d;  // (4) | (2, 4) is not a group
      //
      // Because accesses (2) and (3) are dependent, we can group (2) with (1)
      // but not with (4). If we did, the dependent access (3) would be within
      // the boundaries of the (2, 4) group.
      if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
        // If a dependence exists and A is already in a group, we know that A
        // must be a store since A precedes B and WAR dependences are allowed.
        // Thus, A would be sunk below B. We release A's group to prevent this
        // illegal code motion. A will then be free to form another group with
        // instructions that precede it.
        if (isInterleaved(A)) {
          InterleaveGroup *StoreGroup = getInterleaveGroup(A);
          StoreGroups.remove(StoreGroup);
          releaseGroup(StoreGroup);
        }

        // If a dependence exists and A is not already in a group (or it was
        // and we just released it), B might be hoisted above A (if B is a
        // load) or another store might be sunk below A (if B is a store). In
        // either case, we can't add additional instructions to B's group. B
        // will only form a group with instructions that it precedes.
        break;
      }

      // At this point, we've checked for illegal code motion. If either A or B
      // isn't strided, there's nothing left to do.
      if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
        continue;

      // Ignore A if it's already in a group or isn't the same kind of memory
      // operation as B.
      // Note that mayReadFromMemory() isn't mutually exclusive to
      // mayWriteToMemory in the case of atomic loads. We shouldn't see those
      // here, canVectorizeMemory() should have returned false - except for the
      // case we asked for optimization remarks.
      if (isInterleaved(A) ||
          (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
          (A->mayWriteToMemory() != B->mayWriteToMemory()))
        continue;

      // Check rules 1 and 2. Ignore A if its stride or size is different from
      // that of B.
      if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
        continue;

      // Ignore A if the memory object of A and B don't belong to the same
      // address space
      if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
        continue;

      // Calculate the distance from A to B.
      const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
          PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
      if (!DistToB)
        continue;
      int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

      // Check rule 3. Ignore A if its distance to B is not a multiple of the
      // size.
      if (DistanceToB % static_cast<int64_t>(DesB.Size))
        continue;

      // All members of a predicated interleave-group must have the same predicate,
      // and currently must reside in the same BB.
      BasicBlock *BlockA = A->getParent();  
      BasicBlock *BlockB = B->getParent();  
      if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
          (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
        continue;

      // The index of A is the index of B plus A's distance to B in multiples
      // of the size.
      int IndexA =
          Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

      // Try to insert A into B's group.
      if (Group->insertMember(A, IndexA, DesA.Align)) {
        LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
                          << "    into the interleave group with" << *B
                          << '\n');
        InterleaveGroupMap[A] = Group;

        // Set the first load in program order as the insert position.
        if (A->mayReadFromMemory())
          Group->setInsertPos(A);
      }
    } // Iteration over A accesses.
  }   // Iteration over B accesses.

  // Remove interleaved store groups with gaps.
  for (InterleaveGroup *Group : StoreGroups)
    if (Group->getNumMembers() != Group->getFactor()) {
      LLVM_DEBUG(
          dbgs() << "LV: Invalidate candidate interleaved store group due "
                    "to gaps.\n");
      releaseGroup(Group);
    }
  // Remove interleaved groups with gaps (currently only loads) whose memory
  // accesses may wrap around. We have to revisit the getPtrStride analysis,
  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
  // not check wrapping (see documentation there).
  // FORNOW we use Assume=false;
  // TODO: Change to Assume=true but making sure we don't exceed the threshold
  // of runtime SCEV assumptions checks (thereby potentially failing to
  // vectorize altogether).
  // Additional optional optimizations:
  // TODO: If we are peeling the loop and we know that the first pointer doesn't
  // wrap then we can deduce that all pointers in the group don't wrap.
  // This means that we can forcefully peel the loop in order to only have to
  // check the first pointer for no-wrap. When we'll change to use Assume=true
  // we'll only need at most one runtime check per interleaved group.
  for (InterleaveGroup *Group : LoadGroups) {
    // Case 1: A full group. Can Skip the checks; For full groups, if the wide
    // load would wrap around the address space we would do a memory access at
    // nullptr even without the transformation.
    if (Group->getNumMembers() == Group->getFactor())
      continue;

    // Case 2: If first and last members of the group don't wrap this implies
    // that all the pointers in the group don't wrap.
    // So we check only group member 0 (which is always guaranteed to exist),
    // and group member Factor - 1; If the latter doesn't exist we rely on
    // peeling (if it is a non-reveresed accsess -- see Case 3).
    Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
    if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
                      /*ShouldCheckWrap=*/true)) {
      LLVM_DEBUG(
          dbgs() << "LV: Invalidate candidate interleaved group due to "
                    "first group member potentially pointer-wrapping.\n");
      releaseGroup(Group);
      continue;
    }
    Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
    if (LastMember) {
      Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
      if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
                        /*ShouldCheckWrap=*/true)) {
        LLVM_DEBUG(
            dbgs() << "LV: Invalidate candidate interleaved group due to "
                      "last group member potentially pointer-wrapping.\n");
        releaseGroup(Group);
      }
    } else {
      // Case 3: A non-reversed interleaved load group with gaps: We need
      // to execute at least one scalar epilogue iteration. This will ensure
      // we don't speculatively access memory out-of-bounds. We only need
      // to look for a member at index factor - 1, since every group must have
      // a member at index zero.
      if (Group->isReverse()) {
        LLVM_DEBUG(
            dbgs() << "LV: Invalidate candidate interleaved group due to "
                      "a reverse access with gaps.\n");
        releaseGroup(Group);
        continue;
      }
      LLVM_DEBUG(
          dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
      RequiresScalarEpilogue = true;
    }
  }
}
示例#3
0
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
/// blocks, the successors have gained new predecessors. Update the PHI
/// instructions in them accordingly.
void
TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                        SmallSetVector<MachineBasicBlock*,8> &Succs) {
    for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
            SE = Succs.end(); SI != SE; ++SI) {
        MachineBasicBlock *SuccBB = *SI;
        for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
                II != EE; ++II) {
            if (!II->isPHI())
                break;
            unsigned Idx = 0;
            for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
                MachineOperand &MO = II->getOperand(i+1);
                if (MO.getMBB() == FromBB) {
                    Idx = i;
                    break;
                }
            }

            assert(Idx != 0);
            MachineOperand &MO0 = II->getOperand(Idx);
            unsigned Reg = MO0.getReg();
            if (isDead) {
                // Folded into the previous BB.
                // There could be duplicate phi source entries. FIXME: Should sdisel
                // or earlier pass fixed this?
                for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
                    MachineOperand &MO = II->getOperand(i+1);
                    if (MO.getMBB() == FromBB) {
                        II->RemoveOperand(i+1);
                        II->RemoveOperand(i);
                    }
                }
            } else
                Idx = 0;

            // If Idx is set, the operands at Idx and Idx+1 must be removed.
            // We reuse the location to avoid expensive RemoveOperand calls.

            DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
            if (LI != SSAUpdateVals.end()) {
                // This register is defined in the tail block.
                for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
                    MachineBasicBlock *SrcBB = LI->second[j].first;
                    unsigned SrcReg = LI->second[j].second;
                    if (Idx != 0) {
                        II->getOperand(Idx).setReg(SrcReg);
                        II->getOperand(Idx+1).setMBB(SrcBB);
                        Idx = 0;
                    } else {
                        II->addOperand(MachineOperand::CreateReg(SrcReg, false));
                        II->addOperand(MachineOperand::CreateMBB(SrcBB));
                    }
                }
            } else {
                // Live in tail block, must also be live in predecessors.
                for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
                    MachineBasicBlock *SrcBB = TDBBs[j];
                    if (Idx != 0) {
                        II->getOperand(Idx).setReg(Reg);
                        II->getOperand(Idx+1).setMBB(SrcBB);
                        Idx = 0;
                    } else {
                        II->addOperand(MachineOperand::CreateReg(Reg, false));
                        II->addOperand(MachineOperand::CreateMBB(SrcBB));
                    }
                }
            }
            if (Idx != 0) {
                II->RemoveOperand(Idx+1);
                II->RemoveOperand(Idx);
            }
        }
    }
}
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
  SourceMap SrcMap; // Src -> Def map

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        if (!MRI->isReserved(Def) &&
            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            isNopCopy(CopyMI, Def, Src, TRI)) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP

          // Clear any kills of Def between CopyMI and MI. This extends the
          // live range.
          for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
            I->clearRegisterKills(Def, TRI);

          removeCopy(MI);
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }

      // If Src is defined by a previous copy, it cannot be eliminated.
      for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
        CI = CopyMap.find(*AI);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }

      // Copy is now a candidate for deletion.
      MaybeDeadCopies.insert(MI);

      // If 'Src' is previously source of another copy, then this earlier copy's
      // source is no longer available. e.g.
      // %xmm9<def> = copy %xmm2
      // ...
      // %xmm2<def> = copy %xmm0
      // ...
      // %xmm2<def> = copy %xmm9
      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);

      // Remember Def is defined by the copy.
      // ... Make sure to clear the def maps of aliases first.
      for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
        CopyMap.erase(*AI);
        AvailCopyMap.erase(*AI);
      }
      CopyMap[Def] = MI;
      AvailCopyMap[Def] = MI;
      for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
        CopyMap[*SR] = MI;
        AvailCopyMap[*SR] = MI;
      }

      // Remember source that's copied to Def. Once it's clobbered, then
      // it's no longer available for copy propagation.
      if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
          SrcMap[Src].end()) {
        SrcMap[Src].push_back(Def);
      }

      continue;
    }

    // Not a copy.
    SmallVector<unsigned, 2> Defs;
    int RegMaskOpNum = -1;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (MO.isRegMask())
        RegMaskOpNum = i;
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (TargetRegisterInfo::isVirtualRegister(Reg))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      if (MO.isDef()) {
        Defs.push_back(Reg);
        continue;
      }

      // If 'Reg' is defined by a copy, the copy is no longer a candidate
      // for elimination.
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
        DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }
    }

    // The instruction has a register mask operand which means that it clobbers
    // a large set of registers.  It is possible to use the register mask to
    // prune the available copies, but treat it like a basic block boundary for
    // now.
    if (RegMaskOpNum >= 0) {
      // Erase any MaybeDeadCopies whose destination register is clobbered.
      const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
      for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
           DI != DE; ++DI) {
        unsigned Reg = (*DI)->getOperand(0).getReg();
        if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
          continue;
        removeCopy(*DI);
        Changed = true;
        ++NumDeletes;
      }

      // Clear all data structures as if we were beginning a new basic block.
      MaybeDeadCopies.clear();
      AvailCopyMap.clear();
      CopyMap.clear();
      SrcMap.clear();
      continue;
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Reg = Defs[i];

      // No longer defined by a copy.
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
        CopyMap.erase(*AI);
        AvailCopyMap.erase(*AI);
      }

      // If 'Reg' is previously source of a copy, it is no longer available for
      // copy propagation.
      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
    }
  }

  // If MBB doesn't have successors, delete the copies whose defs are not used.
  // If MBB does have successors, then conservative assume the defs are live-out
  // since we don't want to trust live-in lists.
  if (MBB.succ_empty()) {
    for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
         DI != DE; ++DI) {
      if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
        removeCopy(*DI);
        Changed = true;
        ++NumDeletes;
      }
    }
  }

  return Changed;
}
示例#5
0
文件: LCSSA.cpp 项目: yxsamliu/llvm
/// For every instruction from the worklist, check to see if it has any uses
/// that are outside the current loop.  If so, insert LCSSA PHI nodes and
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
                                    DominatorTree &DT, LoopInfo &LI) {
    SmallVector<Use *, 16> UsesToRewrite;
    SmallSetVector<PHINode *, 16> PHIsToRemove;
    PredIteratorCache PredCache;
    bool Changed = false;

    // Cache the Loop ExitBlocks across this loop.  We expect to get a lot of
    // instructions within the same loops, computing the exit blocks is
    // expensive, and we're not mutating the loop structure.
    SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;

    while (!Worklist.empty()) {
        UsesToRewrite.clear();

        Instruction *I = Worklist.pop_back_val();
        BasicBlock *InstBB = I->getParent();
        Loop *L = LI.getLoopFor(InstBB);
        if (!LoopExitBlocks.count(L))
            L->getExitBlocks(LoopExitBlocks[L]);
        assert(LoopExitBlocks.count(L));
        const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];

        if (ExitBlocks.empty())
            continue;

        // Tokens cannot be used in PHI nodes, so we skip over them.
        // We can run into tokens which are live out of a loop with catchswitch
        // instructions in Windows EH if the catchswitch has one catchpad which
        // is inside the loop and another which is not.
        if (I->getType()->isTokenTy())
            continue;

        for (Use &U : I->uses()) {
            Instruction *User = cast<Instruction>(U.getUser());
            BasicBlock *UserBB = User->getParent();
            if (PHINode *PN = dyn_cast<PHINode>(User))
                UserBB = PN->getIncomingBlock(U);

            if (InstBB != UserBB && !L->contains(UserBB))
                UsesToRewrite.push_back(&U);
        }

        // If there are no uses outside the loop, exit with no change.
        if (UsesToRewrite.empty())
            continue;

        ++NumLCSSA; // We are applying the transformation

        // Invoke instructions are special in that their result value is not
        // available along their unwind edge. The code below tests to see whether
        // DomBB dominates the value, so adjust DomBB to the normal destination
        // block, which is effectively where the value is first usable.
        BasicBlock *DomBB = InstBB;
        if (InvokeInst *Inv = dyn_cast<InvokeInst>(I))
            DomBB = Inv->getNormalDest();

        DomTreeNode *DomNode = DT.getNode(DomBB);

        SmallVector<PHINode *, 16> AddedPHIs;
        SmallVector<PHINode *, 8> PostProcessPHIs;

        SmallVector<PHINode *, 4> InsertedPHIs;
        SSAUpdater SSAUpdate(&InsertedPHIs);
        SSAUpdate.Initialize(I->getType(), I->getName());

        // Insert the LCSSA phi's into all of the exit blocks dominated by the
        // value, and add them to the Phi's map.
        for (BasicBlock *ExitBB : ExitBlocks) {
            if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
                continue;

            // If we already inserted something for this BB, don't reprocess it.
            if (SSAUpdate.HasValueForBlock(ExitBB))
                continue;

            PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
                                          I->getName() + ".lcssa", &ExitBB->front());

            // Add inputs from inside the loop for this PHI.
            for (BasicBlock *Pred : PredCache.get(ExitBB)) {
                PN->addIncoming(I, Pred);

                // If the exit block has a predecessor not within the loop, arrange for
                // the incoming value use corresponding to that predecessor to be
                // rewritten in terms of a different LCSSA PHI.
                if (!L->contains(Pred))
                    UsesToRewrite.push_back(
                        &PN->getOperandUse(PN->getOperandNumForIncomingValue(
                                               PN->getNumIncomingValues() - 1)));
            }

            AddedPHIs.push_back(PN);

            // Remember that this phi makes the value alive in this block.
            SSAUpdate.AddAvailableValue(ExitBB, PN);

            // LoopSimplify might fail to simplify some loops (e.g. when indirect
            // branches are involved). In such situations, it might happen that an
            // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
            // create PHIs in such an exit block, we are also inserting PHIs into L2's
            // header. This could break LCSSA form for L2 because these inserted PHIs
            // can also have uses outside of L2. Remember all PHIs in such situation
            // as to revisit than later on. FIXME: Remove this if indirectbr support
            // into LoopSimplify gets improved.
            if (auto *OtherLoop = LI.getLoopFor(ExitBB))
                if (!L->contains(OtherLoop))
                    PostProcessPHIs.push_back(PN);
        }

        // Rewrite all uses outside the loop in terms of the new PHIs we just
        // inserted.
        for (Use *UseToRewrite : UsesToRewrite) {
            // If this use is in an exit block, rewrite to use the newly inserted PHI.
            // This is required for correctness because SSAUpdate doesn't handle uses
            // in the same block.  It assumes the PHI we inserted is at the end of the
            // block.
            Instruction *User = cast<Instruction>(UseToRewrite->getUser());
            BasicBlock *UserBB = User->getParent();
            if (PHINode *PN = dyn_cast<PHINode>(User))
                UserBB = PN->getIncomingBlock(*UseToRewrite);

            if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
                // Tell the VHs that the uses changed. This updates SCEV's caches.
                if (UseToRewrite->get()->hasValueHandle())
                    ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
                UseToRewrite->set(&UserBB->front());
                continue;
            }

            // Otherwise, do full PHI insertion.
            SSAUpdate.RewriteUse(*UseToRewrite);
        }

        // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
        // to post-process them to keep LCSSA form.
        for (PHINode *InsertedPN : InsertedPHIs) {
            if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
                if (!L->contains(OtherLoop))
                    PostProcessPHIs.push_back(InsertedPN);
        }

        // Post process PHI instructions that were inserted into another disjoint
        // loop and update their exits properly.
        for (auto *PostProcessPN : PostProcessPHIs) {
            if (PostProcessPN->use_empty())
                continue;

            // Reprocess each PHI instruction.
            Worklist.push_back(PostProcessPN);
        }

        // Keep track of PHI nodes that we want to remove because they did not have
        // any uses rewritten.
        for (PHINode *PN : AddedPHIs)
            if (PN->use_empty())
                PHIsToRemove.insert(PN);

        Changed = true;
    }
    // Remove PHI nodes that did not have any uses rewritten.
    for (PHINode *PN : PHIsToRemove) {
        assert (PN->use_empty() && "Trying to remove a phi with uses.");
        PN->eraseFromParent();
    }
    return Changed;
}
void AAEvaluator::runInternal(Function &F, AAResults &AA) {
  const DataLayout &DL = F.getParent()->getDataLayout();

  ++FunctionCount;

  SetVector<Value *> Pointers;
  SmallSetVector<CallBase *, 16> Calls;
  SetVector<Value *> Loads;
  SetVector<Value *> Stores;

  for (auto &I : F.args())
    if (I.getType()->isPointerTy())    // Add all pointer arguments.
      Pointers.insert(&I);

  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
    if (I->getType()->isPointerTy()) // Add all pointer instructions.
      Pointers.insert(&*I);
    if (EvalAAMD && isa<LoadInst>(&*I))
      Loads.insert(&*I);
    if (EvalAAMD && isa<StoreInst>(&*I))
      Stores.insert(&*I);
    Instruction &Inst = *I;
    if (auto *Call = dyn_cast<CallBase>(&Inst)) {
      Value *Callee = Call->getCalledValue();
      // Skip actual functions for direct function calls.
      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
        Pointers.insert(Callee);
      // Consider formals.
      for (Use &DataOp : Call->data_ops())
        if (isInterestingPointer(DataOp))
          Pointers.insert(DataOp);
      Calls.insert(Call);
    } else {
      // Consider all operands.
      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
           OI != OE; ++OI)
        if (isInterestingPointer(*OI))
          Pointers.insert(*OI);
    }
  }

  if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
      PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef)
    errs() << "Function: " << F.getName() << ": " << Pointers.size()
           << " pointers, " << Calls.size() << " call sites\n";

  // iterate over the worklist, and run the full (n^2)/2 disambiguations
  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
       I1 != E; ++I1) {
    auto I1Size = LocationSize::unknown();
    Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
    if (I1ElTy->isSized())
      I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));

    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
      auto I2Size = LocationSize::unknown();
      Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
      if (I2ElTy->isSized())
        I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));

      AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
      switch (AR) {
      case NoAlias:
        PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
        ++NoAliasCount;
        break;
      case MayAlias:
        PrintResults(AR, PrintMayAlias, *I1, *I2, F.getParent());
        ++MayAliasCount;
        break;
      case PartialAlias:
        PrintResults(AR, PrintPartialAlias, *I1, *I2, F.getParent());
        ++PartialAliasCount;
        break;
      case MustAlias:
        PrintResults(AR, PrintMustAlias, *I1, *I2, F.getParent());
        ++MustAliasCount;
        break;
      }
    }
  }

  if (EvalAAMD) {
    // iterate over all pairs of load, store
    for (Value *Load : Loads) {
      for (Value *Store : Stores) {
        AliasResult AR = AA.alias(MemoryLocation::get(cast<LoadInst>(Load)),
                                  MemoryLocation::get(cast<StoreInst>(Store)));
        switch (AR) {
        case NoAlias:
          PrintLoadStoreResults(AR, PrintNoAlias, Load, Store, F.getParent());
          ++NoAliasCount;
          break;
        case MayAlias:
          PrintLoadStoreResults(AR, PrintMayAlias, Load, Store, F.getParent());
          ++MayAliasCount;
          break;
        case PartialAlias:
          PrintLoadStoreResults(AR, PrintPartialAlias, Load, Store, F.getParent());
          ++PartialAliasCount;
          break;
        case MustAlias:
          PrintLoadStoreResults(AR, PrintMustAlias, Load, Store, F.getParent());
          ++MustAliasCount;
          break;
        }
      }
    }

    // iterate over all pairs of store, store
    for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
         I1 != E; ++I1) {
      for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
        AliasResult AR = AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
                                  MemoryLocation::get(cast<StoreInst>(*I2)));
        switch (AR) {
        case NoAlias:
          PrintLoadStoreResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
          ++NoAliasCount;
          break;
        case MayAlias:
          PrintLoadStoreResults(AR, PrintMayAlias, *I1, *I2, F.getParent());
          ++MayAliasCount;
          break;
        case PartialAlias:
          PrintLoadStoreResults(AR, PrintPartialAlias, *I1, *I2, F.getParent());
          ++PartialAliasCount;
          break;
        case MustAlias:
          PrintLoadStoreResults(AR, PrintMustAlias, *I1, *I2, F.getParent());
          ++MustAliasCount;
          break;
        }
      }
    }
  }

  // Mod/ref alias analysis: compare all pairs of calls and values
  for (CallBase *Call : Calls) {
    for (auto Pointer : Pointers) {
      auto Size = LocationSize::unknown();
      Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
      if (ElTy->isSized())
        Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));

      switch (AA.getModRefInfo(Call, Pointer, Size)) {
      case ModRefInfo::NoModRef:
        PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
                           F.getParent());
        ++NoModRefCount;
        break;
      case ModRefInfo::Mod:
        PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent());
        ++ModCount;
        break;
      case ModRefInfo::Ref:
        PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent());
        ++RefCount;
        break;
      case ModRefInfo::ModRef:
        PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer,
                           F.getParent());
        ++ModRefCount;
        break;
      case ModRefInfo::Must:
        PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent());
        ++MustCount;
        break;
      case ModRefInfo::MustMod:
        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer,
                           F.getParent());
        ++MustModCount;
        break;
      case ModRefInfo::MustRef:
        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer,
                           F.getParent());
        ++MustRefCount;
        break;
      case ModRefInfo::MustModRef:
        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call,
                           Pointer, F.getParent());
        ++MustModRefCount;
        break;
      }
    }
  }

  // Mod/ref alias analysis: compare all pairs of calls
  for (CallBase *CallA : Calls) {
    for (CallBase *CallB : Calls) {
      if (CallA == CallB)
        continue;
      switch (AA.getModRefInfo(CallA, CallB)) {
      case ModRefInfo::NoModRef:
        PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB,
                           F.getParent());
        ++NoModRefCount;
        break;
      case ModRefInfo::Mod:
        PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent());
        ++ModCount;
        break;
      case ModRefInfo::Ref:
        PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent());
        ++RefCount;
        break;
      case ModRefInfo::ModRef:
        PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB,
                           F.getParent());
        ++ModRefCount;
        break;
      case ModRefInfo::Must:
        PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent());
        ++MustCount;
        break;
      case ModRefInfo::MustMod:
        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB,
                           F.getParent());
        ++MustModCount;
        break;
      case ModRefInfo::MustRef:
        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB,
                           F.getParent());
        ++MustRefCount;
        break;
      case ModRefInfo::MustModRef:
        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA,
                           CallB, F.getParent());
        ++MustModRefCount;
        break;
      }
    }
  }
}
示例#7
0
LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
    LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N,
    CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR) {
  using Node = LazyCallGraph::Node;
  using Edge = LazyCallGraph::Edge;
  using SCC = LazyCallGraph::SCC;
  using RefSCC = LazyCallGraph::RefSCC;

  RefSCC &InitialRC = InitialC.getOuterRefSCC();
  SCC *C = &InitialC;
  RefSCC *RC = &InitialRC;
  Function &F = N.getFunction();

  // Walk the function body and build up the set of retained, promoted, and
  // demoted edges.
  SmallVector<Constant *, 16> Worklist;
  SmallPtrSet<Constant *, 16> Visited;
  SmallPtrSet<Node *, 16> RetainedEdges;
  SmallSetVector<Node *, 4> PromotedRefTargets;
  SmallSetVector<Node *, 4> DemotedCallTargets;

  // First walk the function and handle all called functions. We do this first
  // because if there is a single call edge, whether there are ref edges is
  // irrelevant.
  for (Instruction &I : instructions(F))
    if (auto CS = CallSite(&I))
      if (Function *Callee = CS.getCalledFunction())
        if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
          Node &CalleeN = *G.lookup(*Callee);
          Edge *E = N->lookup(CalleeN);
          // FIXME: We should really handle adding new calls. While it will
          // make downstream usage more complex, there is no fundamental
          // limitation and it will allow passes within the CGSCC to be a bit
          // more flexible in what transforms they can do. Until then, we
          // verify that new calls haven't been introduced.
          assert(E && "No function transformations should introduce *new* "
                      "call edges! Any new calls should be modeled as "
                      "promoted existing ref edges!");
          bool Inserted = RetainedEdges.insert(&CalleeN).second;
          (void)Inserted;
          assert(Inserted && "We should never visit a function twice.");
          if (!E->isCall())
            PromotedRefTargets.insert(&CalleeN);
        }

  // Now walk all references.
  for (Instruction &I : instructions(F))
    for (Value *Op : I.operand_values())
      if (auto *C = dyn_cast<Constant>(Op))
        if (Visited.insert(C).second)
          Worklist.push_back(C);

  auto VisitRef = [&](Function &Referee) {
    Node &RefereeN = *G.lookup(Referee);
    Edge *E = N->lookup(RefereeN);
    // FIXME: Similarly to new calls, we also currently preclude
    // introducing new references. See above for details.
    assert(E && "No function transformations should introduce *new* ref "
                "edges! Any new ref edges would require IPO which "
                "function passes aren't allowed to do!");
    bool Inserted = RetainedEdges.insert(&RefereeN).second;
    (void)Inserted;
    assert(Inserted && "We should never visit a function twice.");
    if (E->isCall())
      DemotedCallTargets.insert(&RefereeN);
  };
  LazyCallGraph::visitReferences(Worklist, Visited, VisitRef);

  // Include synthetic reference edges to known, defined lib functions.
  for (auto *F : G.getLibFunctions())
    // While the list of lib functions doesn't have repeats, don't re-visit
    // anything handled above.
    if (!Visited.count(F))
      VisitRef(*F);

  // First remove all of the edges that are no longer present in this function.
  // The first step makes these edges uniformly ref edges and accumulates them
  // into a separate data structure so removal doesn't invalidate anything.
  SmallVector<Node *, 4> DeadTargets;
  for (Edge &E : *N) {
    if (RetainedEdges.count(&E.getNode()))
      continue;

    SCC &TargetC = *G.lookupSCC(E.getNode());
    RefSCC &TargetRC = TargetC.getOuterRefSCC();
    if (&TargetRC == RC && E.isCall()) {
      if (C != &TargetC) {
        // For separate SCCs this is trivial.
        RC->switchTrivialInternalEdgeToRef(N, E.getNode());
      } else {
        // Now update the call graph.
        C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, E.getNode()),
                                   G, N, C, AM, UR);
      }
    }

    // Now that this is ready for actual removal, put it into our list.
    DeadTargets.push_back(&E.getNode());
  }
  // Remove the easy cases quickly and actually pull them out of our list.
  DeadTargets.erase(
      llvm::remove_if(DeadTargets,
                      [&](Node *TargetN) {
                        SCC &TargetC = *G.lookupSCC(*TargetN);
                        RefSCC &TargetRC = TargetC.getOuterRefSCC();

                        // We can't trivially remove internal targets, so skip
                        // those.
                        if (&TargetRC == RC)
                          return false;

                        RC->removeOutgoingEdge(N, *TargetN);
                        LLVM_DEBUG(dbgs() << "Deleting outgoing edge from '"
                                          << N << "' to '" << TargetN << "'\n");
                        return true;
                      }),
      DeadTargets.end());

  // Now do a batch removal of the internal ref edges left.
  auto NewRefSCCs = RC->removeInternalRefEdge(N, DeadTargets);
  if (!NewRefSCCs.empty()) {
    // The old RefSCC is dead, mark it as such.
    UR.InvalidatedRefSCCs.insert(RC);

    // Note that we don't bother to invalidate analyses as ref-edge
    // connectivity is not really observable in any way and is intended
    // exclusively to be used for ordering of transforms rather than for
    // analysis conclusions.

    // Update RC to the "bottom".
    assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
    RC = &C->getOuterRefSCC();
    assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");

    // The RC worklist is in reverse postorder, so we enqueue the new ones in
    // RPO except for the one which contains the source node as that is the
    // "bottom" we will continue processing in the bottom-up walk.
    assert(NewRefSCCs.front() == RC &&
           "New current RefSCC not first in the returned list!");
    for (RefSCC *NewRC : llvm::reverse(make_range(std::next(NewRefSCCs.begin()),
                                                  NewRefSCCs.end()))) {
      assert(NewRC != RC && "Should not encounter the current RefSCC further "
                            "in the postorder list of new RefSCCs.");
      UR.RCWorklist.insert(NewRC);
      LLVM_DEBUG(dbgs() << "Enqueuing a new RefSCC in the update worklist: "
                        << *NewRC << "\n");
    }
  }

  // Next demote all the call edges that are now ref edges. This helps make
  // the SCCs small which should minimize the work below as we don't want to
  // form cycles that this would break.
  for (Node *RefTarget : DemotedCallTargets) {
    SCC &TargetC = *G.lookupSCC(*RefTarget);
    RefSCC &TargetRC = TargetC.getOuterRefSCC();

    // The easy case is when the target RefSCC is not this RefSCC. This is
    // only supported when the target RefSCC is a child of this RefSCC.
    if (&TargetRC != RC) {
      assert(RC->isAncestorOf(TargetRC) &&
             "Cannot potentially form RefSCC cycles here!");
      RC->switchOutgoingEdgeToRef(N, *RefTarget);
      LLVM_DEBUG(dbgs() << "Switch outgoing call edge to a ref edge from '" << N
                        << "' to '" << *RefTarget << "'\n");
      continue;
    }

    // We are switching an internal call edge to a ref edge. This may split up
    // some SCCs.
    if (C != &TargetC) {
      // For separate SCCs this is trivial.
      RC->switchTrivialInternalEdgeToRef(N, *RefTarget);
      continue;
    }

    // Now update the call graph.
    C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N,
                               C, AM, UR);
  }

  // Now promote ref edges into call edges.
  for (Node *CallTarget : PromotedRefTargets) {
    SCC &TargetC = *G.lookupSCC(*CallTarget);
    RefSCC &TargetRC = TargetC.getOuterRefSCC();

    // The easy case is when the target RefSCC is not this RefSCC. This is
    // only supported when the target RefSCC is a child of this RefSCC.
    if (&TargetRC != RC) {
      assert(RC->isAncestorOf(TargetRC) &&
             "Cannot potentially form RefSCC cycles here!");
      RC->switchOutgoingEdgeToCall(N, *CallTarget);
      LLVM_DEBUG(dbgs() << "Switch outgoing ref edge to a call edge from '" << N
                        << "' to '" << *CallTarget << "'\n");
      continue;
    }
    LLVM_DEBUG(dbgs() << "Switch an internal ref edge to a call edge from '"
                      << N << "' to '" << *CallTarget << "'\n");

    // Otherwise we are switching an internal ref edge to a call edge. This
    // may merge away some SCCs, and we add those to the UpdateResult. We also
    // need to make sure to update the worklist in the event SCCs have moved
    // before the current one in the post-order sequence
    bool HasFunctionAnalysisProxy = false;
    auto InitialSCCIndex = RC->find(*C) - RC->begin();
    bool FormedCycle = RC->switchInternalEdgeToCall(
        N, *CallTarget, [&](ArrayRef<SCC *> MergedSCCs) {
          for (SCC *MergedC : MergedSCCs) {
            assert(MergedC != &TargetC && "Cannot merge away the target SCC!");

            HasFunctionAnalysisProxy |=
                AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(
                    *MergedC) != nullptr;

            // Mark that this SCC will no longer be valid.
            UR.InvalidatedSCCs.insert(MergedC);

            // FIXME: We should really do a 'clear' here to forcibly release
            // memory, but we don't have a good way of doing that and
            // preserving the function analyses.
            auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
            PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
            AM.invalidate(*MergedC, PA);
          }
        });

    // If we formed a cycle by creating this call, we need to update more data
    // structures.
    if (FormedCycle) {
      C = &TargetC;
      assert(G.lookupSCC(N) == C && "Failed to update current SCC!");

      // If one of the invalidated SCCs had a cached proxy to a function
      // analysis manager, we need to create a proxy in the new current SCC as
      // the invalidated SCCs had their functions moved.
      if (HasFunctionAnalysisProxy)
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G);

      // Any analyses cached for this SCC are no longer precise as the shape
      // has changed by introducing this cycle. However, we have taken care to
      // update the proxies so it remains valide.
      auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
      PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
      AM.invalidate(*C, PA);
    }
    auto NewSCCIndex = RC->find(*C) - RC->begin();
    // If we have actually moved an SCC to be topologically "below" the current
    // one due to merging, we will need to revisit the current SCC after
    // visiting those moved SCCs.
    //
    // It is critical that we *do not* revisit the current SCC unless we
    // actually move SCCs in the process of merging because otherwise we may
    // form a cycle where an SCC is split apart, merged, split, merged and so
    // on infinitely.
    if (InitialSCCIndex < NewSCCIndex) {
      // Put our current SCC back onto the worklist as we'll visit other SCCs
      // that are now definitively ordered prior to the current one in the
      // post-order sequence, and may end up observing more precise context to
      // optimize the current SCC.
      UR.CWorklist.insert(C);
      LLVM_DEBUG(dbgs() << "Enqueuing the existing SCC in the worklist: " << *C
                        << "\n");
      // Enqueue in reverse order as we pop off the back of the worklist.
      for (SCC &MovedC : llvm::reverse(make_range(RC->begin() + InitialSCCIndex,
                                                  RC->begin() + NewSCCIndex))) {
        UR.CWorklist.insert(&MovedC);
        LLVM_DEBUG(dbgs() << "Enqueuing a newly earlier in post-order SCC: "
                          << MovedC << "\n");
      }
    }
  }

  assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!");
  assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
  assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");

  // Record the current RefSCC and SCC for higher layers of the CGSCC pass
  // manager now that all the updates have been applied.
  if (RC != &InitialRC)
    UR.UpdatedRC = RC;
  if (C != &InitialC)
    UR.UpdatedC = C;

  return *C;
}
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
  // Check if the current basic block has a single predecessor.
  if (MBB->pred_size() != 1)
    return false;

  MachineBasicBlock *PredMBB = *MBB->pred_begin();
  MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr();
  if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
    return false;

  ++CompBr;
  do {
    --CompBr;
    if (guaranteesZeroRegInBlock(*CompBr, MBB))
      break;
  } while (CompBr != PredMBB->begin() && CompBr->isTerminator());

  // We've not found a CBZ/CBNZ, time to bail out.
  if (!guaranteesZeroRegInBlock(*CompBr, MBB))
    return false;

  unsigned TargetReg = CompBr->getOperand(0).getReg();
  if (!TargetReg)
    return false;
  assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) &&
         "Expect physical register");

  // Remember all registers aliasing with TargetReg.
  SmallSetVector<unsigned, 8> TargetRegs;
  for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI)
    TargetRegs.insert(*AI);

  bool Changed = false;
  MachineBasicBlock::iterator LastChange = MBB->begin();
  unsigned SmallestDef = TargetReg;
  // Remove redundant Copy instructions unless TargetReg is modified.
  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
    MachineInstr *MI = &*I;
    ++I;
    if (MI->isCopy() && MI->getOperand(0).isReg() &&
        MI->getOperand(1).isReg()) {

      unsigned DefReg = MI->getOperand(0).getReg();
      unsigned SrcReg = MI->getOperand(1).getReg();

      if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
          !MRI->isReserved(DefReg) &&
          (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
        DEBUG(dbgs() << "Remove redundant Copy : ");
        DEBUG((MI)->print(dbgs()));

        MI->eraseFromParent();
        Changed = true;
        LastChange = I;
        NumCopiesRemoved++;
        SmallestDef =
            TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
        continue;
      }
    }

    if (MI->modifiesRegister(TargetReg, TRI))
      break;
  }

  if (!Changed)
    return false;

  // Otherwise, we have to fixup the use-def chain, starting with the
  // CBZ/CBNZ. Conservatively mark as much as we can live.
  CompBr->clearRegisterKills(SmallestDef, TRI);

  if (std::none_of(TargetRegs.begin(), TargetRegs.end(),
                   [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
    MBB->addLiveIn(TargetReg);

  // Clear any kills of TargetReg between CompBr and the last removed COPY.
  for (MachineInstr &MMI :
       make_range(MBB->begin()->getIterator(), LastChange->getIterator()))
    MMI.clearRegisterKills(SmallestDef, TRI);

  return true;
}
示例#9
0
/// After FromBB is tail duplicated into its predecessor blocks, the successors
/// have gained new predecessors. Update the PHI instructions in them
/// accordingly.
void
TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                                  SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                  SmallSetVector<MachineBasicBlock*,8> &Succs) {
  for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
         SE = Succs.end(); SI != SE; ++SI) {
    MachineBasicBlock *SuccBB = *SI;
    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
         II != EE; ++II) {
      if (!II->isPHI())
        break;
      MachineInstrBuilder MIB(*FromBB->getParent(), II);
      unsigned Idx = 0;
      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
        MachineOperand &MO = II->getOperand(i+1);
        if (MO.getMBB() == FromBB) {
          Idx = i;
          break;
        }
      }

      assert(Idx != 0);
      MachineOperand &MO0 = II->getOperand(Idx);
      unsigned Reg = MO0.getReg();
      if (isDead) {
        // Folded into the previous BB.
        // There could be duplicate phi source entries. FIXME: Should sdisel
        // or earlier pass fixed this?
        for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
          MachineOperand &MO = II->getOperand(i+1);
          if (MO.getMBB() == FromBB) {
            II->RemoveOperand(i+1);
            II->RemoveOperand(i);
          }
        }
      } else
        Idx = 0;

      // If Idx is set, the operands at Idx and Idx+1 must be removed.
      // We reuse the location to avoid expensive RemoveOperand calls.

      DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
      if (LI != SSAUpdateVals.end()) {
        // This register is defined in the tail block.
        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
          MachineBasicBlock *SrcBB = LI->second[j].first;
          // If we didn't duplicate a bb into a particular predecessor, we
          // might still have added an entry to SSAUpdateVals to correcly
          // recompute SSA. If that case, avoid adding a dummy extra argument
          // this PHI.
          if (!SrcBB->isSuccessor(SuccBB))
            continue;

          unsigned SrcReg = LI->second[j].second;
          if (Idx != 0) {
            II->getOperand(Idx).setReg(SrcReg);
            II->getOperand(Idx+1).setMBB(SrcBB);
            Idx = 0;
          } else {
            MIB.addReg(SrcReg).addMBB(SrcBB);
          }
        }
      } else {
        // Live in tail block, must also be live in predecessors.
        for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
          MachineBasicBlock *SrcBB = TDBBs[j];
          if (Idx != 0) {
            II->getOperand(Idx).setReg(Reg);
            II->getOperand(Idx+1).setMBB(SrcBB);
            Idx = 0;
          } else {
            MIB.addReg(Reg).addMBB(SrcBB);
          }
        }
      }
      if (Idx != 0) {
        II->RemoveOperand(Idx+1);
        II->RemoveOperand(Idx);
      }
    }
  }
}
示例#10
0
/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
/// module.
bool ModuleLinker::linkModuleFlagsMetadata() {
  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
  if (!SrcModFlags) return false;

  NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();

  // If the destination module doesn't have module flags yet, then just copy
  // over the source module's flags.
  if (DstModFlags->getNumOperands() == 0) {
    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
      DstModFlags->addOperand(SrcModFlags->getOperand(I));

    return false;
  }

  bool HasErr = false;

  // Otherwise, we have to merge them based on their behaviors. First,
  // categorize all of the nodes in the modules' module flags. If an error or
  // warning occurs, then emit the appropriate message(s).
  DenseMap<MDString*, MDNode*> ErrorNode;
  DenseMap<MDString*, MDNode*> WarningNode;
  DenseMap<MDString*, MDNode*> OverrideNode;
  DenseMap<MDString*, SmallSetVector<MDNode*, 8> > RequireNodes;
  SmallSetVector<MDString*, 16> SeenIDs;

  HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode,
                                      OverrideNode, RequireNodes, SeenIDs);
  HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode,
                                      OverrideNode, RequireNodes, SeenIDs);

  // Check that there isn't both an error and warning node for a flag.
  for (SmallSetVector<MDString*, 16>::iterator
         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
    MDString *ID = *I;
    if (ErrorNode[ID] && WarningNode[ID])
      HasErr = emitError("linking module flags '" + ID->getString() +
                         "': IDs have conflicting behaviors");
  }

  // Early exit if we had an error.
  if (HasErr) return true;

  // Get the destination's module flags ready for new operands.
  DstModFlags->dropAllReferences();

  // Add all of the module flags to the destination module.
  DenseMap<MDString*, SmallVector<MDNode*, 4> > AddedNodes;
  for (SmallSetVector<MDString*, 16>::iterator
         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
    MDString *ID = *I;
    if (OverrideNode[ID]) {
      DstModFlags->addOperand(OverrideNode[ID]);
      AddedNodes[ID].push_back(OverrideNode[ID]);
    } else if (ErrorNode[ID]) {
      DstModFlags->addOperand(ErrorNode[ID]);
      AddedNodes[ID].push_back(ErrorNode[ID]);
    } else if (WarningNode[ID]) {
      DstModFlags->addOperand(WarningNode[ID]);
      AddedNodes[ID].push_back(WarningNode[ID]);
    }

    for (SmallSetVector<MDNode*, 8>::iterator
           II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end();
         II != IE; ++II)
      DstModFlags->addOperand(*II);
  }

  // Now check that all of the requirements have been satisfied.
  for (SmallSetVector<MDString*, 16>::iterator
         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
    MDString *ID = *I;
    SmallSetVector<MDNode*, 8> &Set = RequireNodes[ID];

    for (SmallSetVector<MDNode*, 8>::iterator
           II = Set.begin(), IE = Set.end(); II != IE; ++II) {
      MDNode *Node = *II;
      assert(isa<MDNode>(Node->getOperand(2)) &&
             "Module flag's third operand must be an MDNode!");
      MDNode *Val = cast<MDNode>(Node->getOperand(2));

      MDString *ReqID = cast<MDString>(Val->getOperand(0));
      Value *ReqVal = Val->getOperand(1);

      bool HasValue = false;
      for (SmallVectorImpl<MDNode*>::iterator
             RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end();
           RI != RE; ++RI) {
        MDNode *ReqNode = *RI;
        if (ReqNode->getOperand(2) == ReqVal) {
          HasValue = true;
          break;
        }
      }

      if (!HasValue)
        HasErr = emitError("linking module flags '" + ReqID->getString() +
                           "': does not have the required value");
    }
  }

  return HasErr;
}
示例#11
0
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
  SmallVector<ReturnInst *, 16> Returns;
  SmallVector<InvokeInst *, 16> Invokes;
  SmallSetVector<LandingPadInst *, 16> LPads;

  // Look through the terminators of the basic blocks to find invokes.
  for (BasicBlock &BB : F)
    if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
      if (Function *Callee = II->getCalledFunction())
        if (Callee->getIntrinsicID() == Intrinsic::donothing) {
          // Remove the NOP invoke.
          BranchInst::Create(II->getNormalDest(), II);
          II->eraseFromParent();
          continue;
        }

      Invokes.push_back(II);
      LPads.insert(II->getUnwindDest()->getLandingPadInst());
    } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
      Returns.push_back(RI);
    }

  if (Invokes.empty())
    return false;

  NumInvokes += Invokes.size();

  lowerIncomingArguments(F);
  lowerAcrossUnwindEdges(F, Invokes);

  Value *FuncCtx =
      setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
  BasicBlock *EntryBB = &F.front();
  IRBuilder<> Builder(EntryBB->getTerminator());

  // Get a reference to the jump buffer.
  Value *JBufPtr =
      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");

  // Save the frame pointer.
  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
                                               "jbuf_fp_gep");

  Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
  Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);

  // Save the stack pointer.
  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
                                               "jbuf_sp_gep");

  Val = Builder.CreateCall(StackAddrFn, {}, "sp");
  Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);

  // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
  Builder.CreateCall(BuiltinSetupDispatchFn, {});

  // Store a pointer to the function context so that the back-end will know
  // where to look for it.
  Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
  Builder.CreateCall(FuncCtxFn, FuncCtxArg);

  // At this point, we are all set up, update the invoke instructions to mark
  // their call_site values.
  for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
    insertCallSiteStore(Invokes[I], I + 1);

    ConstantInt *CallSiteNum =
        ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);

    // Record the call site value for the back end so it stays associated with
    // the invoke.
    CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
  }

  // Mark call instructions that aren't nounwind as no-action (call_site ==
  // -1). Skip the entry block, as prior to then, no function context has been
  // created for this function and any unexpected exceptions thrown will go
  // directly to the caller's context, which is what we want anyway, so no need
  // to do anything here.
  for (BasicBlock &BB : F) {
    if (&BB == &F.front())
      continue;
    for (Instruction &I : BB)
      if (I.mayThrow())
        insertCallSiteStore(&I, -1);
  }

  // Register the function context and make sure it's known to not throw
  CallInst *Register =
      CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
  Register->setDoesNotThrow();

  // Following any allocas not in the entry block, update the saved SP in the
  // jmpbuf to the new value.
  for (BasicBlock &BB : F) {
    if (&BB == &F.front())
      continue;
    for (Instruction &I : BB) {
      if (auto *CI = dyn_cast<CallInst>(&I)) {
        if (CI->getCalledFunction() != StackRestoreFn)
          continue;
      } else if (!isa<AllocaInst>(&I)) {
        continue;
      }
      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
      StackAddr->insertAfter(&I);
      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
      StoreStackAddr->insertAfter(StackAddr);
    }
  }

  // Finally, for any returns from this function, if this function contains an
  // invoke, add a call to unregister the function context.
  for (ReturnInst *Return : Returns)
    CallInst::Create(UnregisterFn, FuncCtx, "", Return);

  return true;
}
示例#12
0
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;   // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;        // Def -> copies map
  DenseMap<unsigned, unsigned> SrcMap;              // Src -> Def map

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        unsigned SrcSrc = CopyMI->getOperand(1).getReg();
        if (!ReservedRegs.test(Def) &&
            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP
          CopyMI->getOperand(1).setIsKill(false);
          MI->eraseFromParent();
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }

      // If Src is defined by a previous copy, it cannot be eliminated.
      CI = CopyMap.find(Src);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }

      // Copy is now a candidate for deletion.
      MaybeDeadCopies.insert(MI);

      // If 'Src' is previously source of another copy, then this earlier copy's
      // source is no longer available. e.g.
      // %xmm9<def> = copy %xmm2
      // ...
      // %xmm2<def> = copy %xmm0
      // ...
      // %xmm2<def> = copy %xmm9
      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);

      // Remember Def is defined by the copy.
      CopyMap[Def] = MI;
      AvailCopyMap[Def] = MI;
      for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
        CopyMap[*SR] = MI;
        AvailCopyMap[*SR] = MI;
      }

      // Remember source that's copied to Def. Once it's clobbered, then
      // it's no longer available for copy propagation.
      SrcMap[Src] = Def;

      continue;
    }

    // Not a copy.
    SmallVector<unsigned, 2> Defs;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (TargetRegisterInfo::isVirtualRegister(Reg))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      if (MO.isDef()) {
        Defs.push_back(Reg);
        continue;
      }

      // If 'Reg' is defined by a copy, the copy is no longer a candidate
      // for elimination.
      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Reg = Defs[i];

      // No longer defined by a copy.
      CopyMap.erase(Reg);
      AvailCopyMap.erase(Reg);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CopyMap.erase(*AS);
        AvailCopyMap.erase(*AS);
      }

      // If 'Reg' is previously source of a copy, it is no longer available for
      // copy propagation.
      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
    }
  }

  // If MBB doesn't have successors, delete the copies whose defs are not used.
  // If MBB does have successors, then conservative assume the defs are live-out
  // since we don't want to trust live-in lists.
  if (MBB.succ_empty()) {
    for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
         DI != DE; ++DI) {
      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
        (*DI)->eraseFromParent();
        Changed = true;
        ++NumDeletes;
      }
    }
  }

  return Changed;
}
/// Remove dead stores to stack-allocated locations in the function end block.
/// Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
                             MemoryDependenceResults *MD,
                             const TargetLibraryInfo *TLI,
                             InstOverlapIntervalsTy &IOL,
                             DenseMap<Instruction*, size_t> *InstrOrdering) {
  bool MadeChange = false;

  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallSetVector<Value*, 16> DeadStackObjects;

  // Find all of the alloca'd pointers in the entry block.
  BasicBlock &Entry = BB.getParent()->front();
  for (Instruction &I : Entry) {
    if (isa<AllocaInst>(&I))
      DeadStackObjects.insert(&I);

    // Okay, so these are dead heap objects, but if the pointer never escapes
    // then it's leaked by this function anyways.
    else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
      DeadStackObjects.insert(&I);
  }

  // Treat byval or inalloca arguments the same, stores to them are dead at the
  // end of the function.
  for (Argument &AI : BB.getParent()->args())
    if (AI.hasByValOrInAllocaAttr())
      DeadStackObjects.insert(&AI);

  const DataLayout &DL = BB.getModule()->getDataLayout();

  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;

    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
      // See through pointer-to-pointer bitcasts
      SmallVector<Value *, 4> Pointers;
      GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);

      // Stores to stack values are valid candidates for removal.
      bool AllDead = true;
      for (Value *Pointer : Pointers)
        if (!DeadStackObjects.count(Pointer)) {
          AllDead = false;
          break;
        }

      if (AllDead) {
        Instruction *Dead = &*BBI;

        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Objects: ";
              for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
                   E = Pointers.end(); I != E; ++I) {
                dbgs() << **I;
                if (std::next(I) != E)
                  dbgs() << ", ";
              }
              dbgs() << '\n');

        // DCE instructions only used to calculate that store.
        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }

    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(&*BBI, TLI)) {
      DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n  DEAD: "
                   << *&*BBI << '\n');
      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }

    if (isa<AllocaInst>(BBI)) {
      // Remove allocas from the list of dead stack objects; there can't be
      // any references before the definition.
      DeadStackObjects.remove(&*BBI);
      continue;
    }

    if (auto CS = CallSite(&*BBI)) {
      // Remove allocation function calls from the list of dead stack objects;
      // there can't be any references before the definition.
      if (isAllocLikeFn(&*BBI, TLI))
        DeadStackObjects.remove(&*BBI);

      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;

      // If the call might load from any of our allocas, then any store above
      // the call is live.
      DeadStackObjects.remove_if([&](Value *I) {
        // See if the call site touches the value.
        ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));

        return A == MRI_ModRef || A == MRI_Ref;
      });

      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        break;

      continue;
    }

    // We can remove the dead stores, irrespective of the fence and its ordering
    // (release/acquire/seq_cst). Fences only constraints the ordering of
    // already visible stores, it does not make a store visible to other
    // threads. So, skipping over a fence does not change a store from being
    // dead.
    if (isa<FenceInst>(*BBI))
      continue;

    MemoryLocation LoadedLoc;

    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      if (!L->isUnordered()) // Be conservative with atomic/volatile load
        break;
      LoadedLoc = MemoryLocation::get(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = MemoryLocation::get(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = MemoryLocation::getForSource(MTI);
    } else if (!BBI->mayReadFromMemory()) {
      // Instruction doesn't read memory.  Note that stores that weren't removed
      // above will hit this case.
      continue;
    } else {
      // Unknown inst; assume it clobbers everything.
      break;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
示例#14
0
/// Sort the blocks in RPO, taking special care to make sure that loops are
/// contiguous even in the case of split backedges.
///
/// TODO: Determine whether RPO is actually worthwhile, or whether we should
/// move to just a stable-topological-sort-based approach that would preserve
/// more of the original order.
static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
  // Note that we do our own RPO rather than using
  // "llvm/ADT/PostOrderIterator.h" because we want control over the order that
  // successors are visited in (see above). Also, we can sort the blocks in the
  // MachineFunction as we go.
  SmallPtrSet<MachineBasicBlock *, 16> Visited;
  SmallVector<POStackEntry, 16> Stack;

  MachineBasicBlock *EntryBlock = &*MF.begin();
  Visited.insert(EntryBlock);
  Stack.push_back(POStackEntry(EntryBlock, MF, MLI));

  for (;;) {
    POStackEntry &Entry = Stack.back();
    SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs;
    if (!Succs.empty()) {
      MachineBasicBlock *Succ = Succs.pop_back_val();
      if (Visited.insert(Succ).second)
        Stack.push_back(POStackEntry(Succ, MF, MLI));
      continue;
    }

    // Put the block in its position in the MachineFunction.
    MachineBasicBlock &MBB = *Entry.MBB;
    MBB.moveBefore(&*MF.begin());

    // Branch instructions may utilize a fallthrough, so update them if a
    // fallthrough has been added or removed.
    if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() &&
        !MBB.back().isBarrier())
      report_fatal_error(
          "Non-branch terminator with fallthrough cannot yet be rewritten");
    if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch())
      MBB.updateTerminator();

    Stack.pop_back();
    if (Stack.empty())
      break;
  }

  // Now that we've sorted the blocks in RPO, renumber them.
  MF.RenumberBlocks();

#ifndef NDEBUG
  SmallSetVector<MachineLoop *, 8> OnStack;

  // Insert a sentinel representing the degenerate loop that starts at the
  // function entry block and includes the entire function as a "loop" that
  // executes once.
  OnStack.insert(nullptr);

  for (auto &MBB : MF) {
    assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");

    MachineLoop *Loop = MLI.getLoopFor(&MBB);
    if (Loop && &MBB == Loop->getHeader()) {
      // Loop header. The loop predecessor should be sorted above, and the other
      // predecessors should be backedges below.
      for (auto Pred : MBB.predecessors())
        assert(
            (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
            "Loop header predecessors must be loop predecessors or backedges");
      assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
    } else {
      // Not a loop header. All predecessors should be sorted above.
      for (auto Pred : MBB.predecessors())
        assert(Pred->getNumber() < MBB.getNumber() &&
               "Non-loop-header predecessors should be topologically sorted");
      assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
             "Blocks must be nested in their loops");
    }
    while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
      OnStack.pop_back();
  }
  assert(OnStack.pop_back_val() == nullptr &&
         "The function entry block shouldn't actually be a loop header");
  assert(OnStack.empty() &&
         "Control flow stack pushes and pops should be balanced.");
#endif
}
示例#15
0
/// Tests whether this function is known to not return null.
///
/// Requires that the function returns a pointer.
///
/// Returns true if it believes the function will not return a null, and sets
/// \p Speculative based on whether the returned conclusion is a speculative
/// conclusion due to SCC calls.
static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
                            const TargetLibraryInfo &TLI, bool &Speculative) {
    assert(F->getReturnType()->isPointerTy() &&
           "nonnull only meaningful on pointer types");
    Speculative = false;

    SmallSetVector<Value *, 8> FlowsToReturn;
    for (BasicBlock &BB : *F)
        if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
            FlowsToReturn.insert(Ret->getReturnValue());

    for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
        Value *RetVal = FlowsToReturn[i];

        // If this value is locally known to be non-null, we're good
        if (isKnownNonNull(RetVal, &TLI))
            continue;

        // Otherwise, we need to look upwards since we can't make any local
        // conclusions.
        Instruction *RVI = dyn_cast<Instruction>(RetVal);
        if (!RVI)
            return false;
        switch (RVI->getOpcode()) {
        // Extend the analysis by looking upwards.
        case Instruction::BitCast:
        case Instruction::GetElementPtr:
        case Instruction::AddrSpaceCast:
            FlowsToReturn.insert(RVI->getOperand(0));
            continue;
        case Instruction::Select: {
            SelectInst *SI = cast<SelectInst>(RVI);
            FlowsToReturn.insert(SI->getTrueValue());
            FlowsToReturn.insert(SI->getFalseValue());
            continue;
        }
        case Instruction::PHI: {
            PHINode *PN = cast<PHINode>(RVI);
            for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
                FlowsToReturn.insert(PN->getIncomingValue(i));
            continue;
        }
        case Instruction::Call:
        case Instruction::Invoke: {
            CallSite CS(RVI);
            Function *Callee = CS.getCalledFunction();
            // A call to a node within the SCC is assumed to return null until
            // proven otherwise
            if (Callee && SCCNodes.count(Callee)) {
                Speculative = true;
                continue;
            }
            return false;
        }
        default:
            return false; // Unknown source, may be null
        };
        llvm_unreachable("should have either continued or returned");
    }

    return true;
}
示例#16
0
文件: IRMover.cpp 项目: yxsamliu/llvm
/// Merge the linker flags in Src into the Dest module.
Error IRLinker::linkModuleFlagsMetadata() {
  // If the source module has no module flags, we are done.
  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
  if (!SrcModFlags)
    return Error::success();

  // If the destination module doesn't have module flags yet, then just copy
  // over the source module's flags.
  NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata();
  if (DstModFlags->getNumOperands() == 0) {
    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
      DstModFlags->addOperand(SrcModFlags->getOperand(I));

    return Error::success();
  }

  // First build a map of the existing module flags and requirements.
  DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
  SmallSetVector<MDNode *, 16> Requirements;
  for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
    MDNode *Op = DstModFlags->getOperand(I);
    ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
    MDString *ID = cast<MDString>(Op->getOperand(1));

    if (Behavior->getZExtValue() == Module::Require) {
      Requirements.insert(cast<MDNode>(Op->getOperand(2)));
    } else {
      Flags[ID] = std::make_pair(Op, I);
    }
  }

  // Merge in the flags from the source module, and also collect its set of
  // requirements.
  for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
    MDNode *SrcOp = SrcModFlags->getOperand(I);
    ConstantInt *SrcBehavior =
        mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
    MDString *ID = cast<MDString>(SrcOp->getOperand(1));
    MDNode *DstOp;
    unsigned DstIndex;
    std::tie(DstOp, DstIndex) = Flags.lookup(ID);
    unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();

    // If this is a requirement, add it and continue.
    if (SrcBehaviorValue == Module::Require) {
      // If the destination module does not already have this requirement, add
      // it.
      if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
        DstModFlags->addOperand(SrcOp);
      }
      continue;
    }

    // If there is no existing flag with this ID, just add it.
    if (!DstOp) {
      Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
      DstModFlags->addOperand(SrcOp);
      continue;
    }

    // Otherwise, perform a merge.
    ConstantInt *DstBehavior =
        mdconst::extract<ConstantInt>(DstOp->getOperand(0));
    unsigned DstBehaviorValue = DstBehavior->getZExtValue();

    // If either flag has override behavior, handle it first.
    if (DstBehaviorValue == Module::Override) {
      // Diagnose inconsistent flags which both have override behavior.
      if (SrcBehaviorValue == Module::Override &&
          SrcOp->getOperand(2) != DstOp->getOperand(2))
        return stringErr("linking module flags '" + ID->getString() +
                         "': IDs have conflicting override values");
      continue;
    } else if (SrcBehaviorValue == Module::Override) {
      // Update the destination flag to that of the source.
      DstModFlags->setOperand(DstIndex, SrcOp);
      Flags[ID].first = SrcOp;
      continue;
    }

    // Diagnose inconsistent merge behavior types.
    if (SrcBehaviorValue != DstBehaviorValue)
      return stringErr("linking module flags '" + ID->getString() +
                       "': IDs have conflicting behaviors");

    auto replaceDstValue = [&](MDNode *New) {
      Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
      MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
      DstModFlags->setOperand(DstIndex, Flag);
      Flags[ID].first = Flag;
    };

    // Perform the merge for standard behavior types.
    switch (SrcBehaviorValue) {
    case Module::Require:
    case Module::Override:
      llvm_unreachable("not possible");
    case Module::Error: {
      // Emit an error if the values differ.
      if (SrcOp->getOperand(2) != DstOp->getOperand(2))
        return stringErr("linking module flags '" + ID->getString() +
                         "': IDs have conflicting values");
      continue;
    }
    case Module::Warning: {
      // Emit a warning if the values differ.
      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
        emitWarning("linking module flags '" + ID->getString() +
                    "': IDs have conflicting values");
      }
      continue;
    }
    case Module::Append: {
      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
      SmallVector<Metadata *, 8> MDs;
      MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
      MDs.append(DstValue->op_begin(), DstValue->op_end());
      MDs.append(SrcValue->op_begin(), SrcValue->op_end());

      replaceDstValue(MDNode::get(DstM.getContext(), MDs));
      break;
    }
    case Module::AppendUnique: {
      SmallSetVector<Metadata *, 16> Elts;
      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
      Elts.insert(DstValue->op_begin(), DstValue->op_end());
      Elts.insert(SrcValue->op_begin(), SrcValue->op_end());

      replaceDstValue(MDNode::get(DstM.getContext(),
                                  makeArrayRef(Elts.begin(), Elts.end())));
      break;
    }
    }
  }

  // Check all of the requirements.
  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
    MDNode *Requirement = Requirements[I];
    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
    Metadata *ReqValue = Requirement->getOperand(1);

    MDNode *Op = Flags[Flag].first;
    if (!Op || Op->getOperand(2) != ReqValue)
      return stringErr("linking module flags '" + Flag->getString() +
                       "': does not have the required value");
  }
  return Error::success();
}
示例#17
0
void MCObjectDisassembler::buildCFG(MCModule *Module) {
  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
  BBInfoByAddrTy BBInfos;
  AddressSetTy Splits;
  AddressSetTy Calls;

  error_code ec;
  for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
       SI != SE; SI.increment(ec)) {
    if (ec)
      break;
    SymbolRef::Type SymType;
    SI->getType(SymType);
    if (SymType == SymbolRef::ST_Function) {
      uint64_t SymAddr;
      SI->getAddress(SymAddr);
      SymAddr = getEffectiveLoadAddr(SymAddr);
      Calls.push_back(SymAddr);
      Splits.push_back(SymAddr);
    }
  }

  assert(Module->func_begin() == Module->func_end()
         && "Module already has a CFG!");

  // First, determine the basic block boundaries and call targets.
  for (MCModule::atom_iterator AI = Module->atom_begin(),
                               AE = Module->atom_end();
       AI != AE; ++AI) {
    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
    if (!TA) continue;
    Calls.push_back(TA->getBeginAddr());
    BBInfos[TA->getBeginAddr()].Atom = TA;
    for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
         II != IE; ++II) {
      if (MIA.isTerminator(II->Inst))
        Splits.push_back(II->Address + II->Size);
      uint64_t Target;
      if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
        if (MIA.isCall(II->Inst))
          Calls.push_back(Target);
        Splits.push_back(Target);
      }
    }
  }

  RemoveDupsFromAddressVector(Splits);
  RemoveDupsFromAddressVector(Calls);

  // Split text atoms into basic block atoms.
  for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
       SI != SE; ++SI) {
    MCAtom *A = Module->findAtomContaining(*SI);
    if (!A) continue;
    MCTextAtom *TA = cast<MCTextAtom>(A);
    if (TA->getBeginAddr() == *SI)
      continue;
    MCTextAtom *NewAtom = TA->split(*SI);
    BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
    StringRef BBName = TA->getName();
    BBName = BBName.substr(0, BBName.find_last_of(':'));
    NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
  }

  // Compute succs/preds.
  for (MCModule::atom_iterator AI = Module->atom_begin(),
                               AE = Module->atom_end();
                               AI != AE; ++AI) {
    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
    if (!TA) continue;
    BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
    const MCDecodedInst &LI = TA->back();
    if (MIA.isBranch(LI.Inst)) {
      uint64_t Target;
      if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
        CurBB.addSucc(BBInfos[Target]);
      if (MIA.isConditionalBranch(LI.Inst))
        CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
    } else if (!MIA.isTerminator(LI.Inst))
      CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
  }


  // Create functions and basic blocks.
  for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
       CI != CE; ++CI) {
    BBInfo &BBI = BBInfos[*CI];
    if (!BBI.Atom) continue;

    MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());

    // Create MCBBs.
    SmallSetVector<BBInfo*, 16> Worklist;
    Worklist.insert(&BBI);
    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
      BBInfo *BBI = Worklist[wi];
      if (!BBI->Atom)
        continue;
      BBI->BB = &MCFN.createBlock(*BBI->Atom);
      // Add all predecessors and successors to the worklist.
      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
                                 SI != SE; ++SI)
        Worklist.insert(*SI);
      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
                                 PI != PE; ++PI)
        Worklist.insert(*PI);
    }

    // Set preds/succs.
    for (size_t wi = 0; wi < Worklist.size(); ++wi) {
      BBInfo *BBI = Worklist[wi];
      MCBasicBlock *MCBB = BBI->BB;
      if (!MCBB)
        continue;
      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
           SI != SE; ++SI)
        if ((*SI)->BB)
          MCBB->addSuccessor((*SI)->BB);
      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
           PI != PE; ++PI)
        if ((*PI)->BB)
          MCBB->addPredecessor((*PI)->BB);
    }
  }
}
示例#18
0
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
                                   CGSCCUpdateResult &UR) {
  const ModuleAnalysisManager &MAM =
      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
  bool Changed = false;

  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
  Module &M = *InitialC.begin()->getFunction().getParent();
  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);

  if (!ImportedFunctionsStats &&
      InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
    ImportedFunctionsStats =
        llvm::make_unique<ImportedFunctionsInliningStatistics>();
    ImportedFunctionsStats->setModuleInfo(M);
  }

  // We use a single common worklist for calls across the entire SCC. We
  // process these in-order and append new calls introduced during inlining to
  // the end.
  //
  // Note that this particular order of processing is actually critical to
  // avoid very bad behaviors. Consider *highly connected* call graphs where
  // each function contains a small amonut of code and a couple of calls to
  // other functions. Because the LLVM inliner is fundamentally a bottom-up
  // inliner, it can handle gracefully the fact that these all appear to be
  // reasonable inlining candidates as it will flatten things until they become
  // too big to inline, and then move on and flatten another batch.
  //
  // However, when processing call edges *within* an SCC we cannot rely on this
  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
  // functions we can end up incrementally inlining N calls into each of
  // N functions because each incremental inlining decision looks good and we
  // don't have a topological ordering to prevent explosions.
  //
  // To compensate for this, we don't process transitive edges made immediate
  // by inlining until we've done one pass of inlining across the entire SCC.
  // Large, highly connected SCCs still lead to some amount of code bloat in
  // this model, but it is uniformly spread across all the functions in the SCC
  // and eventually they all become too large to inline, rather than
  // incrementally maknig a single function grow in a super linear fashion.
  SmallVector<std::pair<CallSite, int>, 16> Calls;

  FunctionAnalysisManager &FAM =
      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
          .getManager();

  // Populate the initial list of calls in this SCC.
  for (auto &N : InitialC) {
    auto &ORE =
        FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
    // We want to generally process call sites top-down in order for
    // simplifications stemming from replacing the call with the returned value
    // after inlining to be visible to subsequent inlining decisions.
    // FIXME: Using instructions sequence is a really bad way to do this.
    // Instead we should do an actual RPO walk of the function body.
    for (Instruction &I : instructions(N.getFunction()))
      if (auto CS = CallSite(&I))
        if (Function *Callee = CS.getCalledFunction()) {
          if (!Callee->isDeclaration())
            Calls.push_back({CS, -1});
          else if (!isa<IntrinsicInst>(I)) {
            using namespace ore;
            ORE.emit([&]() {
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                     << NV("Callee", Callee) << " will not be inlined into "
                     << NV("Caller", CS.getCaller())
                     << " because its definition is unavailable"
                     << setIsVerbose();
            });
          }
        }
  }
  if (Calls.empty())
    return PreservedAnalyses::all();

  // Capture updatable variables for the current SCC and RefSCC.
  auto *C = &InitialC;
  auto *RC = &C->getOuterRefSCC();

  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function *, int>, 16> InlineHistory;

  // Track a set vector of inlined callees so that we can augment the caller
  // with all of their edges in the call graph before pruning out the ones that
  // got simplified away.
  SmallSetVector<Function *, 4> InlinedCallees;

  // Track the dead functions to delete once finished with inlining calls. We
  // defer deleting these to make it easier to handle the call graph updates.
  SmallVector<Function *, 4> DeadFunctions;

  // Loop forward over all of the calls. Note that we cannot cache the size as
  // inlining can introduce new calls that need to be processed.
  for (int i = 0; i < (int)Calls.size(); ++i) {
    // We expect the calls to typically be batched with sequences of calls that
    // have the same caller, so we first set up some shared infrastructure for
    // this caller. We also do any pruning we can at this layer on the caller
    // alone.
    Function &F = *Calls[i].first.getCaller();
    LazyCallGraph::Node &N = *CG.lookup(F);
    if (CG.lookupSCC(N) != C)
      continue;
    if (F.hasFnAttribute(Attribute::OptimizeNone))
      continue;

    LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");

    // Get a FunctionAnalysisManager via a proxy for this particular node. We
    // do this each time we visit a node as the SCC may have changed and as
    // we're going to mutate this particular function we want to make sure the
    // proxy is in place to forward any invalidation events. We can use the
    // manager we get here for looking up results for functions other than this
    // node however because those functions aren't going to be mutated by this
    // pass.
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
            .getManager();

    // Get the remarks emission analysis for the caller.
    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
        [&](Function &F) -> AssumptionCache & {
      return FAM.getResult<AssumptionAnalysis>(F);
    };
    auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
      return FAM.getResult<BlockFrequencyAnalysis>(F);
    };

    auto GetInlineCost = [&](CallSite CS) {
      Function &Callee = *CS.getCalledFunction();
      auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
      return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
                           PSI, &ORE);
    };

    // Now process as many calls as we have within this caller in the sequnece.
    // We bail out as soon as the caller has to change so we can update the
    // call graph and prepare the context of that new caller.
    bool DidInline = false;
    for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
      int InlineHistoryID;
      CallSite CS;
      std::tie(CS, InlineHistoryID) = Calls[i];
      Function &Callee = *CS.getCalledFunction();

      if (InlineHistoryID != -1 &&
          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
        continue;

      // Check if this inlining may repeat breaking an SCC apart that has
      // already been split once before. In that case, inlining here may
      // trigger infinite inlining, much like is prevented within the inliner
      // itself by the InlineHistory above, but spread across CGSCC iterations
      // and thus hidden from the full inline history.
      if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
          UR.InlinedInternalEdges.count({&N, C})) {
        LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
                             "previously split out of this SCC by inlining: "
                          << F.getName() << " -> " << Callee.getName() << "\n");
        continue;
      }

      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
      // Check whether we want to inline this callsite.
      if (!OIC)
        continue;

      // Setup the data structure used to plumb customization into the
      // `InlineFunction` routine.
      InlineFunctionInfo IFI(
          /*cg=*/nullptr, &GetAssumptionCache, PSI,
          &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
          &FAM.getResult<BlockFrequencyAnalysis>(Callee));

      // Get DebugLoc to report. CS will be invalid after Inliner.
      DebugLoc DLoc = CS->getDebugLoc();
      BasicBlock *Block = CS.getParent();

      using namespace ore;

      if (!InlineFunction(CS, IFI)) {
        ORE.emit([&]() {
          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
                 << NV("Callee", &Callee) << " will not be inlined into "
                 << NV("Caller", &F);
        });
        continue;
      }
      DidInline = true;
      InlinedCallees.insert(&Callee);

      ORE.emit([&]() {
        bool AlwaysInline = OIC->isAlways();
        StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
        OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
        R << NV("Callee", &Callee) << " inlined into ";
        R << NV("Caller", &F);
        if (AlwaysInline)
          R << " with cost=always";
        else {
          R << " with cost=" << NV("Cost", OIC->getCost());
          R << " (threshold=" << NV("Threshold", OIC->getThreshold());
          R << ")";
        }
        return R;
      });

      // Add any new callsites to defined functions to the worklist.
      if (!IFI.InlinedCallSites.empty()) {
        int NewHistoryID = InlineHistory.size();
        InlineHistory.push_back({&Callee, InlineHistoryID});
        for (CallSite &CS : reverse(IFI.InlinedCallSites))
          if (Function *NewCallee = CS.getCalledFunction())
            if (!NewCallee->isDeclaration())
              Calls.push_back({CS, NewHistoryID});
      }

      if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
        ImportedFunctionsStats->recordInline(F, Callee);

      // Merge the attributes based on the inlining.
      AttributeFuncs::mergeAttributesForInlining(F, Callee);

      // For local functions, check whether this makes the callee trivially
      // dead. In that case, we can drop the body of the function eagerly
      // which may reduce the number of callers of other functions to one,
      // changing inline cost thresholds.
      if (Callee.hasLocalLinkage()) {
        // To check this we also need to nuke any dead constant uses (perhaps
        // made dead by this operation on other functions).
        Callee.removeDeadConstantUsers();
        if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
          Calls.erase(
              std::remove_if(Calls.begin() + i + 1, Calls.end(),
                             [&Callee](const std::pair<CallSite, int> &Call) {
                               return Call.first.getCaller() == &Callee;
                             }),
              Calls.end());
          // Clear the body and queue the function itself for deletion when we
          // finish inlining and call graph updates.
          // Note that after this point, it is an error to do anything other
          // than use the callee's address or delete it.
          Callee.dropAllReferences();
          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
                 "Cannot put cause a function to become dead twice!");
          DeadFunctions.push_back(&Callee);
        }
      }
    }

    // Back the call index up by one to put us in a good position to go around
    // the outer loop.
    --i;

    if (!DidInline)
      continue;
    Changed = true;

    // Add all the inlined callees' edges as ref edges to the caller. These are
    // by definition trivial edges as we always have *some* transitive ref edge
    // chain. While in some cases these edges are direct calls inside the
    // callee, they have to be modeled in the inliner as reference edges as
    // there may be a reference edge anywhere along the chain from the current
    // caller to the callee that causes the whole thing to appear like
    // a (transitive) reference edge that will require promotion to a call edge
    // below.
    for (Function *InlinedCallee : InlinedCallees) {
      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
      for (LazyCallGraph::Edge &E : *CalleeN)
        RC->insertTrivialRefEdge(N, E.getNode());
    }

    // At this point, since we have made changes we have at least removed
    // a call instruction. However, in the process we do some incremental
    // simplification of the surrounding code. This simplification can
    // essentially do all of the same things as a function pass and we can
    // re-use the exact same logic for updating the call graph to reflect the
    // change.
    LazyCallGraph::SCC *OldC = C;
    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
    LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
    RC = &C->getOuterRefSCC();

    // If this causes an SCC to split apart into multiple smaller SCCs, there
    // is a subtle risk we need to prepare for. Other transformations may
    // expose an "infinite inlining" opportunity later, and because of the SCC
    // mutation, we will revisit this function and potentially re-inline. If we
    // do, and that re-inlining also has the potentially to mutate the SCC
    // structure, the infinite inlining problem can manifest through infinite
    // SCC splits and merges. To avoid this, we capture the originating caller
    // node and the SCC containing the call edge. This is a slight over
    // approximation of the possible inlining decisions that must be avoided,
    // but is relatively efficient to store.
    // FIXME: This seems like a very heavyweight way of retaining the inline
    // history, we should look for a more efficient way of tracking it.
    if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
          return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
        })) {
      LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
                           "retaining this to avoid infinite inlining.\n");
      UR.InlinedInternalEdges.insert({&N, OldC});
    }
    InlinedCallees.clear();
  }

  // Now that we've finished inlining all of the calls across this SCC, delete
  // all of the trivially dead functions, updating the call graph and the CGSCC
  // pass manager in the process.
  //
  // Note that this walks a pointer set which has non-deterministic order but
  // that is OK as all we do is delete things and add pointers to unordered
  // sets.
  for (Function *DeadF : DeadFunctions) {
    // Get the necessary information out of the call graph and nuke the
    // function there. Also, cclear out any cached analyses.
    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
            .getManager();
    FAM.clear(*DeadF, DeadF->getName());
    AM.clear(DeadC, DeadC.getName());
    auto &DeadRC = DeadC.getOuterRefSCC();
    CG.removeDeadFunction(*DeadF);

    // Mark the relevant parts of the call graph as invalid so we don't visit
    // them.
    UR.InvalidatedSCCs.insert(&DeadC);
    UR.InvalidatedRefSCCs.insert(&DeadRC);

    // And delete the actual function from the module.
    M.getFunctionList().erase(DeadF);
  }

  if (!Changed)
    return PreservedAnalyses::all();

  // Even if we change the IR, we update the core CGSCC data structures and so
  // can preserve the proxy to the function analysis manager.
  PreservedAnalyses PA;
  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  return PA;
}
示例#19
0
/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
/// known.  Otherwise remember the dependency for later.
///
/// @param SVIIter SibValues entry to propagate.
/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
                                          VNInfo *VNI) {
  SibValueMap::value_type *SVI = &*SVIIter;

  // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
  TinyPtrVector<VNInfo*> FirstDeps;
  if (VNI) {
    FirstDeps.push_back(VNI);
    SVI->second.Deps.push_back(VNI);
  }

  // Has the value been completely determined yet?  If not, defer propagation.
  if (!SVI->second.hasDef())
    return;

  // Work list of values to propagate.
  SmallSetVector<SibValueMap::value_type *, 8> WorkList;
  WorkList.insert(SVI);

  do {
    SVI = WorkList.pop_back_val();
    TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
    VNI = nullptr;

    SibValueInfo &SV = SVI->second;
    if (!SV.SpillMBB)
      SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);

    DEBUG(dbgs() << "  prop to " << Deps->size() << ": "
                 << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);

    assert(SV.hasDef() && "Propagating undefined value");

    // Should this value be propagated as a preferred spill candidate?  We don't
    // propagate values of registers that are about to spill.
    bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
    unsigned SpillDepth = ~0u;

    for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
         DepE = Deps->end(); DepI != DepE; ++DepI) {
      SibValueMap::iterator DepSVI = SibValues.find(*DepI);
      assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
      SibValueInfo &DepSV = DepSVI->second;
      if (!DepSV.SpillMBB)
        DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);

      bool Changed = false;

      // Propagate defining instruction.
      if (!DepSV.hasDef()) {
        Changed = true;
        DepSV.DefMI = SV.DefMI;
        DepSV.DefByOrigPHI = SV.DefByOrigPHI;
      }

      // Propagate AllDefsAreReloads.  For PHI values, this computes an AND of
      // all predecessors.
      if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
        Changed = true;
        DepSV.AllDefsAreReloads = false;
      }

      // Propagate best spill value.
      if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
        if (SV.SpillMBB == DepSV.SpillMBB) {
          // DepSV is in the same block.  Hoist when dominated.
          if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
            // This is an alternative def earlier in the same MBB.
            // Hoist the spill as far as possible in SpillMBB. This can ease
            // register pressure:
            //
            //   x = def
            //   y = use x
            //   s = copy x
            //
            // Hoisting the spill of s to immediately after the def removes the
            // interference between x and y:
            //
            //   x = def
            //   spill x
            //   y = use x<kill>
            //
            // This hoist only helps when the DepSV copy kills its source.
            Changed = true;
            DepSV.SpillReg = SV.SpillReg;
            DepSV.SpillVNI = SV.SpillVNI;
            DepSV.SpillMBB = SV.SpillMBB;
          }
        } else {
          // DepSV is in a different block.
          if (SpillDepth == ~0u)
            SpillDepth = Loops.getLoopDepth(SV.SpillMBB);

          // Also hoist spills to blocks with smaller loop depth, but make sure
          // that the new value dominates.  Non-phi dependents are always
          // dominated, phis need checking.

          const BranchProbability MarginProb(4, 5); // 80%
          // Hoist a spill to outer loop if there are multiple dependents (it
          // can be beneficial if more than one dependents are hoisted) or
          // if DepSV (the hoisting source) is hotter than SV (the hoisting
          // destination) (we add a 80% margin to bias a little towards
          // loop depth).
          bool HoistCondition =
            (MBFI.getBlockFreq(DepSV.SpillMBB) >=
             (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
            Deps->size() > 1;

          if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
              HoistCondition &&
              (!DepSVI->first->isPHIDef() ||
               MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
            Changed = true;
            DepSV.SpillReg = SV.SpillReg;
            DepSV.SpillVNI = SV.SpillVNI;
            DepSV.SpillMBB = SV.SpillMBB;
          }
        }
      }

      if (!Changed)
        continue;

      // Something changed in DepSVI. Propagate to dependents.
      WorkList.insert(&*DepSVI);

      DEBUG(dbgs() << "  update " << DepSVI->first->id << '@'
            << DepSVI->first->def << " to:\t" << DepSV);
    }
  } while (!WorkList.empty());
}
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
  if (!A || !B)
    return nullptr;

  if (A == B)
    return A;

  // For struct-path aware TBAA, we use the access type of the tag.
  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
  if (StructPath) {
    A = cast_or_null<MDNode>(A->getOperand(1));
    if (!A)
      return nullptr;
    B = cast_or_null<MDNode>(B->getOperand(1));
    if (!B)
      return nullptr;
  }

  SmallSetVector<MDNode *, 4> PathA;
  MDNode *T = A;
  while (T) {
    if (PathA.count(T))
      report_fatal_error("Cycle found in TBAA metadata.");
    PathA.insert(T);
    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
                                 : nullptr;
  }

  SmallSetVector<MDNode *, 4> PathB;
  T = B;
  while (T) {
    if (PathB.count(T))
      report_fatal_error("Cycle found in TBAA metadata.");
    PathB.insert(T);
    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
                                 : nullptr;
  }

  int IA = PathA.size() - 1;
  int IB = PathB.size() - 1;

  MDNode *Ret = nullptr;
  while (IA >= 0 && IB >= 0) {
    if (PathA[IA] == PathB[IB])
      Ret = PathA[IA];
    else
      break;
    --IA;
    --IB;
  }
  if (!StructPath)
    return Ret;

  if (!Ret)
    return nullptr;
  // We need to convert from a type node to a tag node.
  Type *Int64 = IntegerType::get(A->getContext(), 64);
  Metadata *Ops[3] = {Ret, Ret,
                      ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
  return MDNode::get(A->getContext(), Ops);
}
示例#21
0
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block.  Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallSetVector<Value*, 16> DeadStackObjects;

  // Find all of the alloca'd pointers in the entry block.
  BasicBlock *Entry = BB.getParent()->begin();
  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
    if (isa<AllocaInst>(I))
      DeadStackObjects.insert(I);

    // Okay, so these are dead heap objects, but if the pointer never escapes
    // then it's leaked by this function anyways.
    else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
      DeadStackObjects.insert(I);
  }

  // Treat byval or inalloca arguments the same, stores to them are dead at the
  // end of the function.
  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
    if (AI->hasByValOrInAllocaAttr())
      DeadStackObjects.insert(AI);

  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;

    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
      // See through pointer-to-pointer bitcasts
      SmallVector<Value *, 4> Pointers;
      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);

      // Stores to stack values are valid candidates for removal.
      bool AllDead = true;
      for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
           E = Pointers.end(); I != E; ++I)
        if (!DeadStackObjects.count(*I)) {
          AllDead = false;
          break;
        }

      if (AllDead) {
        Instruction *Dead = BBI++;

        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Objects: ";
              for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
                   E = Pointers.end(); I != E; ++I) {
                dbgs() << **I;
                if (std::next(I) != E)
                  dbgs() << ", ";
              }
              dbgs() << '\n');

        // DCE instructions only used to calculate that store.
        DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }

    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(BBI, TLI)) {
      Instruction *Inst = BBI++;
      DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }

    if (isa<AllocaInst>(BBI)) {
      // Remove allocas from the list of dead stack objects; there can't be
      // any references before the definition.
      DeadStackObjects.remove(BBI);
      continue;
    }

    if (CallSite CS = cast<Value>(BBI)) {
      // Remove allocation function calls from the list of dead stack objects;
      // there can't be any references before the definition.
      if (isAllocLikeFn(BBI, TLI))
        DeadStackObjects.remove(BBI);

      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;

      // If the call might load from any of our allocas, then any store above
      // the call is live.
      DeadStackObjects.remove_if([&](Value *I) {
        // See if the call site touches the value.
        AliasAnalysis::ModRefResult A =
            AA->getModRefInfo(CS, I, getPointerSize(I, *AA));

        return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
      });

      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        break;

      continue;
    }

    AliasAnalysis::Location LoadedLoc;

    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      if (!L->isUnordered()) // Be conservative with atomic/volatile load
        break;
      LoadedLoc = AA->getLocation(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = AA->getLocation(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = AA->getLocationForSource(MTI);
    } else if (!BBI->mayReadFromMemory()) {
      // Instruction doesn't read memory.  Note that stores that weren't removed
      // above will hit this case.
      continue;
    } else {
      // Unknown inst; assume it clobbers everything.
      break;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
示例#22
0
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
  SmallVector<ReturnInst*,     16> Returns;
  SmallVector<InvokeInst*,     16> Invokes;
  SmallSetVector<LandingPadInst*, 16> LPads;

  // Look through the terminators of the basic blocks to find invokes.
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      Invokes.push_back(II);
      LPads.insert(II->getUnwindDest()->getLandingPadInst());
    } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
      Returns.push_back(RI);
    }

  if (Invokes.empty()) return false;

  NumInvokes += Invokes.size();

  lowerIncomingArguments(F);
  lowerAcrossUnwindEdges(F, Invokes);

  Value *FuncCtx =
    setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
  BasicBlock *EntryBB = F.begin();
  Type *Int32Ty = Type::getInt32Ty(F.getContext());

  Value *Idxs[2] = {
    ConstantInt::get(Int32Ty, 0), 0
  };

  // Get a reference to the jump buffer.
  Idxs[1] = ConstantInt::get(Int32Ty, 5);
  Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep",
                                             EntryBB->getTerminator());

  // Save the frame pointer.
  Idxs[1] = ConstantInt::get(Int32Ty, 0);
  Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
                                              EntryBB->getTerminator());

  Value *Val = CallInst::Create(FrameAddrFn,
                                ConstantInt::get(Int32Ty, 0),
                                "fp",
                                EntryBB->getTerminator());
  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());

  // Save the stack pointer.
  Idxs[1] = ConstantInt::get(Int32Ty, 2);
  Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
                                              EntryBB->getTerminator());

  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());

  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
  Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr,
                                      Type::getInt8PtrTy(F.getContext()), "",
                                      EntryBB->getTerminator());
  CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator());

  // Store a pointer to the function context so that the back-end will know
  // where to look for it.
  Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx,
                                       Type::getInt8PtrTy(F.getContext()), "",
                                       EntryBB->getTerminator());
  CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator());

  // At this point, we are all set up, update the invoke instructions to mark
  // their call_site values.
  for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
    insertCallSiteStore(Invokes[I], I + 1);

    ConstantInt *CallSiteNum =
      ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);

    // Record the call site value for the back end so it stays associated with
    // the invoke.
    CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
  }

  // Mark call instructions that aren't nounwind as no-action (call_site ==
  // -1). Skip the entry block, as prior to then, no function context has been
  // created for this function and any unexpected exceptions thrown will go
  // directly to the caller's context, which is what we want anyway, so no need
  // to do anything here.
  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
      if (CallInst *CI = dyn_cast<CallInst>(I)) {
        if (!CI->doesNotThrow())
          insertCallSiteStore(CI, -1);
      } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
        insertCallSiteStore(RI, -1);
      }

  // Register the function context and make sure it's known to not throw
  CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
                                        EntryBB->getTerminator());
  Register->setDoesNotThrow();

  // Following any allocas not in the entry block, update the saved SP in the
  // jmpbuf to the new value.
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
    if (BB == F.begin())
      continue;
    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
      if (CallInst *CI = dyn_cast<CallInst>(I)) {
        if (CI->getCalledFunction() != StackRestoreFn)
          continue;
      } else if (!isa<AllocaInst>(I)) {
        continue;
      }
      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
      StackAddr->insertAfter(I);
      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
      StoreStackAddr->insertAfter(StackAddr);
    }
  }

  // Finally, for any returns from this function, if this function contains an
  // invoke, add a call to unregister the function context.
  for (unsigned I = 0, E = Returns.size(); I != E; ++I)
    CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);

  return true;
}
示例#23
0
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
/// entire function. Instead it starts at an instruction provided by the caller
/// and copies (and prunes) only the code reachable from that instruction.
void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                     const Instruction *StartingInst,
                                     ValueToValueMapTy &VMap,
                                     bool ModuleLevelChanges,
                                     SmallVectorImpl<ReturnInst *> &Returns,
                                     const char *NameSuffix,
                                     ClonedCodeInfo *CodeInfo) {
  assert(NameSuffix && "NameSuffix cannot be null!");

  ValueMapTypeRemapper *TypeMapper = nullptr;
  ValueMaterializer *Materializer = nullptr;

#ifndef NDEBUG
  // If the cloning starts at the beginning of the function, verify that
  // the function arguments are mapped.
  if (!StartingInst)
    for (const Argument &II : OldFunc->args())
      assert(VMap.count(&II) && "No mapping from source argument specified!");
#endif

  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
                            NameSuffix, CodeInfo);
  const BasicBlock *StartingBB;
  if (StartingInst)
    StartingBB = StartingInst->getParent();
  else {
    StartingBB = &OldFunc->getEntryBlock();
    StartingInst = &StartingBB->front();
  }

  // Clone the entry block, and anything recursively reachable from it.
  std::vector<const BasicBlock*> CloneWorklist;
  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
  while (!CloneWorklist.empty()) {
    const BasicBlock *BB = CloneWorklist.back();
    CloneWorklist.pop_back();
    PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
  }
  
  // Loop over all of the basic blocks in the old function.  If the block was
  // reachable, we have cloned it and the old block is now in the value map:
  // insert it into the new function in the right order.  If not, ignore it.
  //
  // Defer PHI resolution until rest of function is resolved.
  SmallVector<const PHINode*, 16> PHIToResolve;
  for (const BasicBlock &BI : *OldFunc) {
    Value *V = VMap.lookup(&BI);
    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
    if (!NewBB) continue;  // Dead block.

    // Add the new block to the new function.
    NewFunc->getBasicBlockList().push_back(NewBB);

    // Handle PHI nodes specially, as we have to remove references to dead
    // blocks.
    for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
      // PHI nodes may have been remapped to non-PHI nodes by the caller or
      // during the cloning process.
      if (const PHINode *PN = dyn_cast<PHINode>(I)) {
        if (isa<PHINode>(VMap[PN]))
          PHIToResolve.push_back(PN);
        else
          break;
      } else {
        break;
      }
    }

    // Finally, remap the terminator instructions, as those can't be remapped
    // until all BBs are mapped.
    RemapInstruction(NewBB->getTerminator(), VMap,
                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                     TypeMapper, Materializer);
  }
  
  // Defer PHI resolution until rest of function is resolved, PHI resolution
  // requires the CFG to be up-to-date.
  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
    const PHINode *OPN = PHIToResolve[phino];
    unsigned NumPreds = OPN->getNumIncomingValues();
    const BasicBlock *OldBB = OPN->getParent();
    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);

    // Map operands for blocks that are live and remove operands for blocks
    // that are dead.
    for (; phino != PHIToResolve.size() &&
         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
      OPN = PHIToResolve[phino];
      PHINode *PN = cast<PHINode>(VMap[OPN]);
      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
        Value *V = VMap.lookup(PN->getIncomingBlock(pred));
        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
          Value *InVal = MapValue(PN->getIncomingValue(pred),
                                  VMap, 
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
          assert(InVal && "Unknown input value?");
          PN->setIncomingValue(pred, InVal);
          PN->setIncomingBlock(pred, MappedBlock);
        } else {
          PN->removeIncomingValue(pred, false);
          --pred;  // Revisit the next entry.
          --e;
        }
      } 
    }
    
    // The loop above has removed PHI entries for those blocks that are dead
    // and has updated others.  However, if a block is live (i.e. copied over)
    // but its terminator has been changed to not go to this block, then our
    // phi nodes will have invalid entries.  Update the PHI nodes in this
    // case.
    PHINode *PN = cast<PHINode>(NewBB->begin());
    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
    if (NumPreds != PN->getNumIncomingValues()) {
      assert(NumPreds < PN->getNumIncomingValues());
      // Count how many times each predecessor comes to this block.
      std::map<BasicBlock*, unsigned> PredCount;
      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
           PI != E; ++PI)
        --PredCount[*PI];
      
      // Figure out how many entries to remove from each PHI.
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
        ++PredCount[PN->getIncomingBlock(i)];
      
      // At this point, the excess predecessor entries are positive in the
      // map.  Loop over all of the PHIs and remove excess predecessor
      // entries.
      BasicBlock::iterator I = NewBB->begin();
      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
        for (const auto &PCI : PredCount) {
          BasicBlock *Pred = PCI.first;
          for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
            PN->removeIncomingValue(Pred, false);
        }
      }
    }
    
    // If the loops above have made these phi nodes have 0 or 1 operand,
    // replace them with undef or the input value.  We must do this for
    // correctness, because 0-operand phis are not valid.
    PN = cast<PHINode>(NewBB->begin());
    if (PN->getNumIncomingValues() == 0) {
      BasicBlock::iterator I = NewBB->begin();
      BasicBlock::const_iterator OldI = OldBB->begin();
      while ((PN = dyn_cast<PHINode>(I++))) {
        Value *NV = UndefValue::get(PN->getType());
        PN->replaceAllUsesWith(NV);
        assert(VMap[&*OldI] == PN && "VMap mismatch");
        VMap[&*OldI] = NV;
        PN->eraseFromParent();
        ++OldI;
      }
    }
  }

  // Make a second pass over the PHINodes now that all of them have been
  // remapped into the new function, simplifying the PHINode and performing any
  // recursive simplifications exposed. This will transparently update the
  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
  // two PHINodes, the iteration over the old PHIs remains valid, and the
  // mapping will just map us to the new node (which may not even be a PHI
  // node).
  const DataLayout &DL = NewFunc->getParent()->getDataLayout();
  SmallSetVector<const Value *, 8> Worklist;
  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
    if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
      Worklist.insert(PHIToResolve[Idx]);

  // Note that we must test the size on each iteration, the worklist can grow.
  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
    const Value *OrigV = Worklist[Idx];
    auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
    if (!I)
      continue;

    // See if this instruction simplifies.
    Value *SimpleV = SimplifyInstruction(I, DL);
    if (!SimpleV)
      continue;

    // Stash away all the uses of the old instruction so we can check them for
    // recursive simplifications after a RAUW. This is cheaper than checking all
    // uses of To on the recursive step in most cases.
    for (const User *U : OrigV->users())
      Worklist.insert(cast<Instruction>(U));

    // Replace the instruction with its simplified value.
    I->replaceAllUsesWith(SimpleV);

    // If the original instruction had no side effects, remove it.
    if (isInstructionTriviallyDead(I))
      I->eraseFromParent();
    else
      VMap[OrigV] = I;
  }

  // Now that the inlined function body has been fully constructed, go through
  // and zap unconditional fall-through branches. This happens all the time when
  // specializing code: code specialization turns conditional branches into
  // uncond branches, and this code folds them.
  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
  Function::iterator I = Begin;
  while (I != NewFunc->end()) {
    // Check if this block has become dead during inlining or other
    // simplifications. Note that the first block will appear dead, as it has
    // not yet been wired up properly.
    if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
                       I->getSinglePredecessor() == &*I)) {
      BasicBlock *DeadBB = &*I++;
      DeleteDeadBlock(DeadBB);
      continue;
    }

    // We need to simplify conditional branches and switches with a constant
    // operand. We try to prune these out when cloning, but if the
    // simplification required looking through PHI nodes, those are only
    // available after forming the full basic block. That may leave some here,
    // and we still want to prune the dead code as early as possible.
    ConstantFoldTerminator(&*I);

    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
    if (!BI || BI->isConditional()) { ++I; continue; }
    
    BasicBlock *Dest = BI->getSuccessor(0);
    if (!Dest->getSinglePredecessor()) {
      ++I; continue;
    }

    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
    // above should have zapped all of them..
    assert(!isa<PHINode>(Dest->begin()));

    // We know all single-entry PHI nodes in the inlined function have been
    // removed, so we just need to splice the blocks.
    BI->eraseFromParent();
    
    // Make all PHI nodes that referred to Dest now refer to I as their source.
    Dest->replaceAllUsesWith(&*I);

    // Move all the instructions in the succ to the pred.
    I->getInstList().splice(I->end(), Dest->getInstList());
    
    // Remove the dest block.
    Dest->eraseFromParent();
    
    // Do not increment I, iteratively merge all things this block branches to.
  }

  // Make a final pass over the basic blocks from the old function to gather
  // any return instructions which survived folding. We have to do this here
  // because we can iteratively remove and merge returns above.
  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
                          E = NewFunc->end();
       I != E; ++I)
    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
      Returns.push_back(RI);
}
/// performStoreOnlyObjectElimination - Scan the graph of uses of the specified
/// object allocation.  If the object does not escape and is only stored to
/// (this happens because GVN and other optimizations hoists forward substitutes
/// all stores to the object to eliminate all loads from it), then zap the
/// object and all accesses related to it.
static bool performStoreOnlyObjectElimination(CallInst &Allocation,
                                              BasicBlock::iterator &BBI) {
  DtorKind DtorInfo = analyzeDestructor(Allocation.getArgOperand(0));

  // We can't delete the object if its destructor has side effects.
  if (DtorInfo != DtorKind::NoSideEffects)
    return false;

  // Do a depth first search exploring all of the uses of the object pointer,
  // following through casts, pointer adjustments etc.  If we find any loads or
  // any escape sites of the object, we give up.  If we succeed in walking the
  // entire graph of uses, we can remove the resultant set.
  SmallSetVector<Instruction*, 16> InvolvedInstructions;
  SmallVector<Instruction*, 16> Worklist;
  Worklist.push_back(&Allocation);

  // Stores - Keep track of all of the store instructions we see.
  SmallVector<StoreInst*, 16> Stores;

  while (!Worklist.empty()) {
    Instruction *I = Worklist.pop_back_val();

    // Insert the instruction into our InvolvedInstructions set.  If we have
    // already seen it, then don't reprocess all of the uses.
    if (!InvolvedInstructions.insert(I)) continue;

    // Okay, this is the first time we've seen this instruction, proceed.
    switch (classifyInstruction(*I)) {
    // These instructions should not reach here based on the pass ordering.
    // i.e. LLVMARCOpt -> LLVMContractOpt.
    case RT_RetainN:
    case RT_UnknownRetainN:
    case RT_BridgeRetainN:
    case RT_ReleaseN:
    case RT_UnknownReleaseN:
    case RT_BridgeReleaseN:
      llvm_unreachable("These are only created by LLVMARCContract !");
    case RT_AllocObject:
      // If this is a different swift_allocObject than we started with, then
      // there is some computation feeding into a size or alignment computation
      // that we have to keep... unless we can delete *that* entire object as
      // well.
      break;

    case RT_NoMemoryAccessed:
      // If no memory is accessed, then something is being done with the
      // pointer: maybe it is bitcast or GEP'd. Since there are no side effects,
      // it is perfectly fine to delete this instruction if all uses of the
      // instruction are also eliminable.

      if (I->mayHaveSideEffects() || isa<TerminatorInst>(I))
        return false;
      break;

    case RT_Release:
    case RT_Retain:
    case RT_FixLifetime:
    case RT_CheckUnowned:
      // It is perfectly fine to eliminate various retains and releases of this
      // object: we are zapping all accesses or none.
      break;

    // If this is an unknown instruction, we have more interesting things to
    // consider.
    case RT_Unknown:
    case RT_ObjCRelease:
    case RT_ObjCRetain:
    case RT_UnknownRetain:
    case RT_UnknownRelease:
    case RT_BridgeRetain:
    case RT_BridgeRelease:
    case RT_RetainUnowned:

      // Otherwise, this really is some unhandled instruction.  Bail out.
      return false;
    }

    // Okay, if we got here, the instruction can be eaten so-long as all of its
    // uses can be.  Scan through the uses and add them to the worklist for
    // recursive processing.
    for (auto UI = I->user_begin(), E = I->user_end(); UI != E; ++UI) {
      Instruction *User = cast<Instruction>(*UI);

      // Handle stores as a special case here: we want to make sure that the
      // object is being stored *to*, not itself being stored (which would be an
      // escape point).  Since stores themselves don't have any uses, we can
      // short-cut the classification scheme above.
      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
        // If this is a store *to* the object, we can zap it.
        if (UI.getUse().getOperandNo() == StoreInst::getPointerOperandIndex()) {
          InvolvedInstructions.insert(SI);
          continue;
        }
        // Otherwise, using the object as a source (or size) is an escape.
        return false;
      }
      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
        // If this is a memset/memcpy/memmove *to* the object, we can zap it.
        if (UI.getUse().getOperandNo() == 0) {
          InvolvedInstructions.insert(MI);
          continue;
        }
        // Otherwise, using the object as a source (or size) is an escape.
        return false;
      }

      // Otherwise, normal instructions just go on the worklist for processing.
      Worklist.push_back(User);
    }
  }

  // Ok, we succeeded!  This means we can zap all of the instructions that use
  // the object.  One thing we have to be careful of is to make sure that we
  // don't invalidate "BBI" (the iterator the outer walk of the optimization
  // pass is using, and indicates the next instruction to process).  This would
  // happen if we delete the instruction it is pointing to.  Advance the
  // iterator if that would happen.
  while (InvolvedInstructions.count(&*BBI))
    ++BBI;

  // Zap all of the instructions.
  for (auto I : InvolvedInstructions) {
    if (!I->use_empty())
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
    I->eraseFromParent();
  }

  ++NumStoreOnlyObjectsEliminated;
  return true;
}
示例#25
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Only analyze inner loops. We can't properly estimate cost of nested loops
  // and we won't visit inner loops again anyway.
  if (!L->empty())
    return None;

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;

  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // We track the simplification of each instruction in each iteration. We use
  // this to recursively merge costs into the unrolled cost on-demand so that
  // we don't count the cost of any dead code. This is essentially a map from
  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
  DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;

  // A small worklist used to accumulate cost of instructions from each
  // observable and reached root in the loop.
  SmallVector<Instruction *, 16> CostWorklist;

  // PHI-used worklist used between iterations while accumulating cost.
  SmallVector<Instruction *, 4> PHIUsedList;

  // Helper function to accumulate cost for instructions in the loop.
  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
    assert(Iteration >= 0 && "Cannot have a negative iteration!");
    assert(CostWorklist.empty() && "Must start with an empty cost list");
    assert(PHIUsedList.empty() && "Must start with an empty phi used list");
    CostWorklist.push_back(&RootI);
    for (;; --Iteration) {
      do {
        Instruction *I = CostWorklist.pop_back_val();

        // InstCostMap only uses I and Iteration as a key, the other two values
        // don't matter here.
        auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
        if (CostIter == InstCostMap.end())
          // If an input to a PHI node comes from a dead path through the loop
          // we may have no cost data for it here. What that actually means is
          // that it is free.
          continue;
        auto &Cost = *CostIter;
        if (Cost.IsCounted)
          // Already counted this instruction.
          continue;

        // Mark that we are counting the cost of this instruction now.
        Cost.IsCounted = true;

        // If this is a PHI node in the loop header, just add it to the PHI set.
        if (auto *PhiI = dyn_cast<PHINode>(I))
          if (PhiI->getParent() == L->getHeader()) {
            assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
                                  "inherently simplify during unrolling.");
            if (Iteration == 0)
              continue;

            // Push the incoming value from the backedge into the PHI used list
            // if it is an in-loop instruction. We'll use this to populate the
            // cost worklist for the next iteration (as we count backwards).
            if (auto *OpI = dyn_cast<Instruction>(
                    PhiI->getIncomingValueForBlock(L->getLoopLatch())))
              if (L->contains(OpI))
                PHIUsedList.push_back(OpI);
            continue;
          }

        // First accumulate the cost of this instruction.
        if (!Cost.IsFree) {
          UnrolledCost += TTI.getUserCost(I);
          DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
                       << "): ");
          DEBUG(I->dump());
        }

        // We must count the cost of every operand which is not free,
        // recursively. If we reach a loop PHI node, simply add it to the set
        // to be considered on the next iteration (backwards!).
        for (Value *Op : I->operands()) {
          // Check whether this operand is free due to being a constant or
          // outside the loop.
          auto *OpI = dyn_cast<Instruction>(Op);
          if (!OpI || !L->contains(OpI))
            continue;

          // Otherwise accumulate its cost.
          CostWorklist.push_back(OpI);
        }
      } while (!CostWorklist.empty());

      if (PHIUsedList.empty())
        // We've exhausted the search.
        break;

      assert(Iteration > 0 &&
             "Cannot track PHI-used values past the first iteration!");
      CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
      PHIUsedList.clear();
    }
  };

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        // Track this instruction's expected baseline cost when executing the
        // rolled loop form.
        RolledDynamicCost += TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns true, mark the instruction as free after
        // unrolling and continue.
        bool IsFree = Analyzer.visit(I);
        bool Inserted = InstCostMap.insert({&I, (int)Iteration,
                                           (unsigned)IsFree,
                                           /*IsCounted*/ false}).second;
        (void)Inserted;
        assert(Inserted && "Cannot have a state for an unvisited instruction!");

        if (IsFree)
          continue;

        // If the instruction might have a side-effect recursively account for
        // the cost of it and all the instructions leading up to it.
        if (I.mayHaveSideEffects())
          AddCostRecursively(I, Iteration);

        // Can't properly model a cost of a call.
        // FIXME: With a proper cost model we should be able to do it.
        if(isa<CallInst>(&I))
          return None;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      BasicBlock *KnownSucc = nullptr;
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              KnownSucc = BI->getSuccessor(0);
            else if (ConstantInt *SimpleCondVal =
                         dyn_cast<ConstantInt>(SimpleCond))
              KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            KnownSucc = SI->getSuccessor(0);
          else if (ConstantInt *SimpleCondVal =
                       dyn_cast<ConstantInt>(SimpleCond))
            KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
        }
      }
      if (KnownSucc) {
        if (L->contains(KnownSucc))
          BBWorklist.insert(KnownSucc);
        else
          ExitWorklist.insert({BB, KnownSucc});
        continue;
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
        else
          ExitWorklist.insert({BB, Succ});
      AddCostRecursively(*TI, Iteration);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }

  while (!ExitWorklist.empty()) {
    BasicBlock *ExitingBB, *ExitBB;
    std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

    for (Instruction &I : *ExitBB) {
      auto *PN = dyn_cast<PHINode>(&I);
      if (!PN)
        break;

      Value *Op = PN->getIncomingValueForBlock(ExitingBB);
      if (auto *OpI = dyn_cast<Instruction>(Op))
        if (L->contains(OpI))
          AddCostRecursively(*OpI, TripCount - 1);
    }
  }

  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}
示例#26
0
/// \brief Figure out if the loop is worth full unrolling.
///
/// Complete loop unrolling can make some loads constant, and we need to know
/// if that would expose any further optimization opportunities.  This routine
/// estimates this optimization.  It computes cost of unrolled loop
/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
/// dynamic cost we mean that we won't count costs of blocks that are known not
/// to be executed (i.e. if we have a branch in the loop and we know that at the
/// given iteration its condition would be resolved to true, we won't add up the
/// cost of the 'false'-block).
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
static Optional<EstimatedUnrollCost>
analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
                      int MaxUnrolledLoopSize) {
  // We want to be able to scale offsets by the trip count and add more offsets
  // to them without checking for overflows, and we already don't want to
  // analyze *massive* trip counts, so we force the max to be reasonably small.
  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
         "The unroll iterations max is too large!");

  // Don't simulate loops with a big or unknown tripcount
  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
      TripCount > UnrollMaxIterationsCountToAnalyze)
    return None;

  SmallSetVector<BasicBlock *, 16> BBWorklist;
  DenseMap<Value *, Constant *> SimplifiedValues;
  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;

  // The estimated cost of the unrolled form of the loop. We try to estimate
  // this by simplifying as much as we can while computing the estimate.
  int UnrolledCost = 0;
  // We also track the estimated dynamic (that is, actually executed) cost in
  // the rolled form. This helps identify cases when the savings from unrolling
  // aren't just exposing dead control flows, but actual reduced dynamic
  // instructions due to the simplifications which we expect to occur after
  // unrolling.
  int RolledDynamicCost = 0;

  // Ensure that we don't violate the loop structure invariants relied on by
  // this analysis.
  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
  assert(L->isLCSSAForm(DT) &&
         "Must have loops in LCSSA form to track live-out values.");

  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

  // Simulate execution of each iteration of the loop counting instructions,
  // which would be simplified.
  // Since the same load will take different values on different iterations,
  // we literally have to go through all loop's iterations.
  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

    // Prepare for the iteration by collecting any simplified entry or backedge
    // inputs.
    for (Instruction &I : *L->getHeader()) {
      auto *PHI = dyn_cast<PHINode>(&I);
      if (!PHI)
        break;

      // The loop header PHI nodes must have exactly two input: one from the
      // loop preheader and one from the loop latch.
      assert(
          PHI->getNumIncomingValues() == 2 &&
          "Must have an incoming value only for the preheader and the latch.");

      Value *V = PHI->getIncomingValueForBlock(
          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
      Constant *C = dyn_cast<Constant>(V);
      if (Iteration != 0 && !C)
        C = SimplifiedValues.lookup(V);
      if (C)
        SimplifiedInputValues.push_back({PHI, C});
    }

    // Now clear and re-populate the map for the next iteration.
    SimplifiedValues.clear();
    while (!SimplifiedInputValues.empty())
      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());

    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);

    BBWorklist.clear();
    BBWorklist.insert(L->getHeader());
    // Note that we *must not* cache the size, this loop grows the worklist.
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      BasicBlock *BB = BBWorklist[Idx];

      // Visit all instructions in the given basic block and try to simplify
      // it.  We don't change the actual IR, just count optimization
      // opportunities.
      for (Instruction &I : *BB) {
        int InstCost = TTI.getUserCost(&I);

        // Visit the instruction to analyze its loop cost after unrolling,
        // and if the visitor returns false, include this instruction in the
        // unrolled cost.
        if (!Analyzer.visit(I))
          UnrolledCost += InstCost;
        else {
          DEBUG(dbgs() << "  " << I
                       << " would be simplified if loop is unrolled.\n");
          (void)0;
        }

        // Also track this instructions expected cost when executing the rolled
        // loop form.
        RolledDynamicCost += InstCost;

        // If unrolled body turns out to be too big, bail out.
        if (UnrolledCost > MaxUnrolledLoopSize) {
          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
                       << "  UnrolledCost: " << UnrolledCost
                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
                       << "\n");
          return None;
        }
      }

      TerminatorInst *TI = BB->getTerminator();

      // Add in the live successors by first checking whether we have terminator
      // that may be simplified based on the values simplified by this call.
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional()) {
          if (Constant *SimpleCond =
                  SimplifiedValues.lookup(BI->getCondition())) {
            BasicBlock *Succ = nullptr;
            // Just take the first successor if condition is undef
            if (isa<UndefValue>(SimpleCond))
              Succ = BI->getSuccessor(0);
            else
              Succ = BI->getSuccessor(
                  cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
            if (L->contains(Succ))
              BBWorklist.insert(Succ);
            continue;
          }
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
        if (Constant *SimpleCond =
                SimplifiedValues.lookup(SI->getCondition())) {
          BasicBlock *Succ = nullptr;
          // Just take the first successor if condition is undef
          if (isa<UndefValue>(SimpleCond))
            Succ = SI->getSuccessor(0);
          else
            Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
                       .getCaseSuccessor();
          if (L->contains(Succ))
            BBWorklist.insert(Succ);
          continue;
        }
      }

      // Add BB's successors to the worklist.
      for (BasicBlock *Succ : successors(BB))
        if (L->contains(Succ))
          BBWorklist.insert(Succ);
    }

    // If we found no optimization opportunities on the first iteration, we
    // won't find them on later ones too.
    if (UnrolledCost == RolledDynamicCost) {
      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                   << "  UnrolledCost: " << UnrolledCost << "\n");
      return None;
    }
  }
  DEBUG(dbgs() << "Analysis finished:\n"
               << "UnrolledCost: " << UnrolledCost << ", "
               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
  return {{UnrolledCost, RolledDynamicCost}};
}