Example #1
0
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  if (Succ->isEHPad())
    return nullptr;

  MachineFunction *MF = getParent();
  DebugLoc DL;  // FIXME: this is nowhere

  // Performance might be harmed on HW that implements branching using exec mask
  // where both sides of the branches are always executed.
  if (MF->getTarget().requiresStructuredCFG())
    return nullptr;

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return nullptr;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return nullptr;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
  SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
  if (LIS)
    LIS->insertMBBInMaps(NMBB);
  else if (Indexes)
    Indexes->insertMBBInMaps(NMBB);

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  SmallVector<unsigned, 4> UsedRegs;
  if (LIS) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = &*I;

      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0)
          continue;

        unsigned Reg = OI->getReg();
        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
          UsedRegs.push_back(Reg);
      }
    }
  }

  ReplaceUsesOfBlockWith(Succ, NMBB);

  // If updateTerminator() removes instructions, we need to remove them from
  // SlotIndexes.
  SmallVector<MachineInstr*, 4> Terminators;
  if (Indexes) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      Terminators.push_back(&*I);
  }

  updateTerminator();

  if (Indexes) {
    SmallVector<MachineInstr*, 4> NewTerminators;
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      NewTerminators.push_back(&*I);

    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
        E = Terminators.end(); I != E; ++I) {
      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
          NewTerminators.end())
       Indexes->removeMachineInstrFromMaps(*I);
    }
  }

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);

    if (Indexes) {
      for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
           I != E; ++I) {
        // Some instructions may have been moved to NMBB by updateTerminator(),
        // so we first remove any instruction that already has an index.
        if (Indexes->hasIndex(&*I))
          Indexes->removeMachineInstrFromMaps(&*I);
        Indexes->insertMachineInstrInMaps(&*I);
      }
    }
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (const auto &LI : Succ->liveins())
    NMBB->addLiveIn(LI);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(&*I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (LIS) {
    // After splitting the edge and updating SlotIndexes, live intervals may be
    // in one of two situations, depending on whether this block was the last in
    // the function. If the original block was the last in the function, all
    // live intervals will end prior to the beginning of the new split block. If
    // the original block was not at the end of the function, all live intervals
    // will extend to the end of the new split block.

    bool isLastMBB =
      std::next(MachineFunction::iterator(NMBB)) == getParent()->end();

    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
    SlotIndex PrevIndex = StartIndex.getPrevSlot();
    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);

    // Find the registers used from NMBB in PHIs in Succ.
    SmallSet<unsigned, 8> PHISrcRegs;
    for (MachineBasicBlock::instr_iterator
         I = Succ->instr_begin(), E = Succ->instr_end();
         I != E && I->isPHI(); ++I) {
      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
        if (I->getOperand(ni+1).getMBB() == NMBB) {
          MachineOperand &MO = I->getOperand(ni);
          unsigned Reg = MO.getReg();
          PHISrcRegs.insert(Reg);
          if (MO.isUndef())
            continue;

          LiveInterval &LI = LIS->getInterval(Reg);
          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
          assert(VNI &&
                 "PHI sources should be live out of their predecessors.");
          LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
        }
      }
    }

    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
        continue;

      LiveInterval &LI = LIS->getInterval(Reg);
      if (!LI.liveAt(PrevIndex))
        continue;

      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
      if (isLiveOut && isLastMBB) {
        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
        assert(VNI && "LiveInterval should have VNInfo where it is live.");
        LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
      } else if (!isLiveOut && !isLastMBB) {
        LI.removeSegment(StartIndex, EndIndex);
      }
    }

    // Update all intervals for registers whose uses may have been modified by
    // updateTerminator().
    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>())
    MDT->recordSplitCriticalEdge(this, Succ, NMBB);

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
Example #2
0
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
                                   CGSCCUpdateResult &UR) {
  const ModuleAnalysisManager &MAM =
      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
  bool Changed = false;

  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
  Module &M = *InitialC.begin()->getFunction().getParent();
  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);

  if (!ImportedFunctionsStats &&
      InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
    ImportedFunctionsStats =
        llvm::make_unique<ImportedFunctionsInliningStatistics>();
    ImportedFunctionsStats->setModuleInfo(M);
  }

  // We use a single common worklist for calls across the entire SCC. We
  // process these in-order and append new calls introduced during inlining to
  // the end.
  //
  // Note that this particular order of processing is actually critical to
  // avoid very bad behaviors. Consider *highly connected* call graphs where
  // each function contains a small amonut of code and a couple of calls to
  // other functions. Because the LLVM inliner is fundamentally a bottom-up
  // inliner, it can handle gracefully the fact that these all appear to be
  // reasonable inlining candidates as it will flatten things until they become
  // too big to inline, and then move on and flatten another batch.
  //
  // However, when processing call edges *within* an SCC we cannot rely on this
  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
  // functions we can end up incrementally inlining N calls into each of
  // N functions because each incremental inlining decision looks good and we
  // don't have a topological ordering to prevent explosions.
  //
  // To compensate for this, we don't process transitive edges made immediate
  // by inlining until we've done one pass of inlining across the entire SCC.
  // Large, highly connected SCCs still lead to some amount of code bloat in
  // this model, but it is uniformly spread across all the functions in the SCC
  // and eventually they all become too large to inline, rather than
  // incrementally maknig a single function grow in a super linear fashion.
  SmallVector<std::pair<CallSite, int>, 16> Calls;

  FunctionAnalysisManager &FAM =
      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
          .getManager();

  // Populate the initial list of calls in this SCC.
  for (auto &N : InitialC) {
    auto &ORE =
        FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
    // We want to generally process call sites top-down in order for
    // simplifications stemming from replacing the call with the returned value
    // after inlining to be visible to subsequent inlining decisions.
    // FIXME: Using instructions sequence is a really bad way to do this.
    // Instead we should do an actual RPO walk of the function body.
    for (Instruction &I : instructions(N.getFunction()))
      if (auto CS = CallSite(&I))
        if (Function *Callee = CS.getCalledFunction()) {
          if (!Callee->isDeclaration())
            Calls.push_back({CS, -1});
          else if (!isa<IntrinsicInst>(I)) {
            using namespace ore;
            ORE.emit([&]() {
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                     << NV("Callee", Callee) << " will not be inlined into "
                     << NV("Caller", CS.getCaller())
                     << " because its definition is unavailable"
                     << setIsVerbose();
            });
          }
        }
  }
  if (Calls.empty())
    return PreservedAnalyses::all();

  // Capture updatable variables for the current SCC and RefSCC.
  auto *C = &InitialC;
  auto *RC = &C->getOuterRefSCC();

  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function *, int>, 16> InlineHistory;

  // Track a set vector of inlined callees so that we can augment the caller
  // with all of their edges in the call graph before pruning out the ones that
  // got simplified away.
  SmallSetVector<Function *, 4> InlinedCallees;

  // Track the dead functions to delete once finished with inlining calls. We
  // defer deleting these to make it easier to handle the call graph updates.
  SmallVector<Function *, 4> DeadFunctions;

  // Loop forward over all of the calls. Note that we cannot cache the size as
  // inlining can introduce new calls that need to be processed.
  for (int i = 0; i < (int)Calls.size(); ++i) {
    // We expect the calls to typically be batched with sequences of calls that
    // have the same caller, so we first set up some shared infrastructure for
    // this caller. We also do any pruning we can at this layer on the caller
    // alone.
    Function &F = *Calls[i].first.getCaller();
    LazyCallGraph::Node &N = *CG.lookup(F);
    if (CG.lookupSCC(N) != C)
      continue;
    if (F.hasFnAttribute(Attribute::OptimizeNone))
      continue;

    LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");

    // Get a FunctionAnalysisManager via a proxy for this particular node. We
    // do this each time we visit a node as the SCC may have changed and as
    // we're going to mutate this particular function we want to make sure the
    // proxy is in place to forward any invalidation events. We can use the
    // manager we get here for looking up results for functions other than this
    // node however because those functions aren't going to be mutated by this
    // pass.
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
            .getManager();

    // Get the remarks emission analysis for the caller.
    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
        [&](Function &F) -> AssumptionCache & {
      return FAM.getResult<AssumptionAnalysis>(F);
    };
    auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
      return FAM.getResult<BlockFrequencyAnalysis>(F);
    };

    auto GetInlineCost = [&](CallSite CS) {
      Function &Callee = *CS.getCalledFunction();
      auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
      return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
                           PSI, &ORE);
    };

    // Now process as many calls as we have within this caller in the sequnece.
    // We bail out as soon as the caller has to change so we can update the
    // call graph and prepare the context of that new caller.
    bool DidInline = false;
    for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
      int InlineHistoryID;
      CallSite CS;
      std::tie(CS, InlineHistoryID) = Calls[i];
      Function &Callee = *CS.getCalledFunction();

      if (InlineHistoryID != -1 &&
          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
        continue;

      // Check if this inlining may repeat breaking an SCC apart that has
      // already been split once before. In that case, inlining here may
      // trigger infinite inlining, much like is prevented within the inliner
      // itself by the InlineHistory above, but spread across CGSCC iterations
      // and thus hidden from the full inline history.
      if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
          UR.InlinedInternalEdges.count({&N, C})) {
        LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
                             "previously split out of this SCC by inlining: "
                          << F.getName() << " -> " << Callee.getName() << "\n");
        continue;
      }

      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
      // Check whether we want to inline this callsite.
      if (!OIC)
        continue;

      // Setup the data structure used to plumb customization into the
      // `InlineFunction` routine.
      InlineFunctionInfo IFI(
          /*cg=*/nullptr, &GetAssumptionCache, PSI,
          &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
          &FAM.getResult<BlockFrequencyAnalysis>(Callee));

      // Get DebugLoc to report. CS will be invalid after Inliner.
      DebugLoc DLoc = CS->getDebugLoc();
      BasicBlock *Block = CS.getParent();

      using namespace ore;

      if (!InlineFunction(CS, IFI)) {
        ORE.emit([&]() {
          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
                 << NV("Callee", &Callee) << " will not be inlined into "
                 << NV("Caller", &F);
        });
        continue;
      }
      DidInline = true;
      InlinedCallees.insert(&Callee);

      ORE.emit([&]() {
        bool AlwaysInline = OIC->isAlways();
        StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
        OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
        R << NV("Callee", &Callee) << " inlined into ";
        R << NV("Caller", &F);
        if (AlwaysInline)
          R << " with cost=always";
        else {
          R << " with cost=" << NV("Cost", OIC->getCost());
          R << " (threshold=" << NV("Threshold", OIC->getThreshold());
          R << ")";
        }
        return R;
      });

      // Add any new callsites to defined functions to the worklist.
      if (!IFI.InlinedCallSites.empty()) {
        int NewHistoryID = InlineHistory.size();
        InlineHistory.push_back({&Callee, InlineHistoryID});
        for (CallSite &CS : reverse(IFI.InlinedCallSites))
          if (Function *NewCallee = CS.getCalledFunction())
            if (!NewCallee->isDeclaration())
              Calls.push_back({CS, NewHistoryID});
      }

      if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
        ImportedFunctionsStats->recordInline(F, Callee);

      // Merge the attributes based on the inlining.
      AttributeFuncs::mergeAttributesForInlining(F, Callee);

      // For local functions, check whether this makes the callee trivially
      // dead. In that case, we can drop the body of the function eagerly
      // which may reduce the number of callers of other functions to one,
      // changing inline cost thresholds.
      if (Callee.hasLocalLinkage()) {
        // To check this we also need to nuke any dead constant uses (perhaps
        // made dead by this operation on other functions).
        Callee.removeDeadConstantUsers();
        if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
          Calls.erase(
              std::remove_if(Calls.begin() + i + 1, Calls.end(),
                             [&Callee](const std::pair<CallSite, int> &Call) {
                               return Call.first.getCaller() == &Callee;
                             }),
              Calls.end());
          // Clear the body and queue the function itself for deletion when we
          // finish inlining and call graph updates.
          // Note that after this point, it is an error to do anything other
          // than use the callee's address or delete it.
          Callee.dropAllReferences();
          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
                 "Cannot put cause a function to become dead twice!");
          DeadFunctions.push_back(&Callee);
        }
      }
    }

    // Back the call index up by one to put us in a good position to go around
    // the outer loop.
    --i;

    if (!DidInline)
      continue;
    Changed = true;

    // Add all the inlined callees' edges as ref edges to the caller. These are
    // by definition trivial edges as we always have *some* transitive ref edge
    // chain. While in some cases these edges are direct calls inside the
    // callee, they have to be modeled in the inliner as reference edges as
    // there may be a reference edge anywhere along the chain from the current
    // caller to the callee that causes the whole thing to appear like
    // a (transitive) reference edge that will require promotion to a call edge
    // below.
    for (Function *InlinedCallee : InlinedCallees) {
      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
      for (LazyCallGraph::Edge &E : *CalleeN)
        RC->insertTrivialRefEdge(N, E.getNode());
    }

    // At this point, since we have made changes we have at least removed
    // a call instruction. However, in the process we do some incremental
    // simplification of the surrounding code. This simplification can
    // essentially do all of the same things as a function pass and we can
    // re-use the exact same logic for updating the call graph to reflect the
    // change.
    LazyCallGraph::SCC *OldC = C;
    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
    LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
    RC = &C->getOuterRefSCC();

    // If this causes an SCC to split apart into multiple smaller SCCs, there
    // is a subtle risk we need to prepare for. Other transformations may
    // expose an "infinite inlining" opportunity later, and because of the SCC
    // mutation, we will revisit this function and potentially re-inline. If we
    // do, and that re-inlining also has the potentially to mutate the SCC
    // structure, the infinite inlining problem can manifest through infinite
    // SCC splits and merges. To avoid this, we capture the originating caller
    // node and the SCC containing the call edge. This is a slight over
    // approximation of the possible inlining decisions that must be avoided,
    // but is relatively efficient to store.
    // FIXME: This seems like a very heavyweight way of retaining the inline
    // history, we should look for a more efficient way of tracking it.
    if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
          return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
        })) {
      LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
                           "retaining this to avoid infinite inlining.\n");
      UR.InlinedInternalEdges.insert({&N, OldC});
    }
    InlinedCallees.clear();
  }

  // Now that we've finished inlining all of the calls across this SCC, delete
  // all of the trivially dead functions, updating the call graph and the CGSCC
  // pass manager in the process.
  //
  // Note that this walks a pointer set which has non-deterministic order but
  // that is OK as all we do is delete things and add pointers to unordered
  // sets.
  for (Function *DeadF : DeadFunctions) {
    // Get the necessary information out of the call graph and nuke the
    // function there. Also, cclear out any cached analyses.
    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
            .getManager();
    FAM.clear(*DeadF, DeadF->getName());
    AM.clear(DeadC, DeadC.getName());
    auto &DeadRC = DeadC.getOuterRefSCC();
    CG.removeDeadFunction(*DeadF);

    // Mark the relevant parts of the call graph as invalid so we don't visit
    // them.
    UR.InvalidatedSCCs.insert(&DeadC);
    UR.InvalidatedRefSCCs.insert(&DeadRC);

    // And delete the actual function from the module.
    M.getFunctionList().erase(DeadF);
  }

  if (!Changed)
    return PreservedAnalyses::all();

  // Even if we change the IR, we update the core CGSCC data structures and so
  // can preserve the proxy to the function analysis manager.
  PreservedAnalyses PA;
  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  return PA;
}
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
  StackProtector *SP = &getAnalysis<StackProtector>();

  bool StackGrowsDown =
    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;

  // Loop over all of the stack objects, assigning sequential addresses...
  MachineFrameInfo *MFI = Fn.getFrameInfo();

  // Start at the beginning of the local area.
  // The Offset is the distance from the stack top in the direction
  // of stack growth -- so it's always nonnegative.
  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
  if (StackGrowsDown)
    LocalAreaOffset = -LocalAreaOffset;
  assert(LocalAreaOffset >= 0
         && "Local area offset should be in direction of stack growth");
  int64_t Offset = LocalAreaOffset;

  // Skew to be applied to alignment.
  unsigned Skew = TFI.getStackAlignmentSkew(Fn);

  // If there are fixed sized objects that are preallocated in the local area,
  // non-fixed objects can't be allocated right at the start of local area.
  // We currently don't support filling in holes in between fixed sized
  // objects, so we adjust 'Offset' to point to the end of last fixed sized
  // preallocated object.
  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
    int64_t FixedOff;
    if (StackGrowsDown) {
      // The maximum distance from the stack pointer is at lower address of
      // the object -- which is given by offset. For down growing stack
      // the offset is negative, so we negate the offset to get the distance.
      FixedOff = -MFI->getObjectOffset(i);
    } else {
      // The maximum distance from the start pointer is at the upper
      // address of the object.
      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
    }
    if (FixedOff > Offset) Offset = FixedOff;
  }

  // First assign frame offsets to stack objects that are used to spill
  // callee saved registers.
  if (StackGrowsDown) {
    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
      // If the stack grows down, we need to add the size to find the lowest
      // address of the object.
      Offset += MFI->getObjectSize(i);

      unsigned Align = MFI->getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = RoundUpToAlignment(Offset, Align, Skew);

      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
    }
  } else {
    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
      unsigned Align = MFI->getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = RoundUpToAlignment(Offset, Align, Skew);

      MFI->setObjectOffset(i, Offset);
      Offset += MFI->getObjectSize(i);
    }
  }

  unsigned MaxAlign = MFI->getMaxAlignment();

  // Make sure the special register scavenging spill slot is closest to the
  // incoming stack pointer if a frame pointer is required and is closer
  // to the incoming rather than the final stack pointer.
  const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
  bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
                               TFI.isFPCloseToIncomingSP() &&
                               RegInfo->useFPForScavengingIndex(Fn) &&
                               !RegInfo->needsStackRealignment(Fn));
  if (RS && EarlyScavengingSlots) {
    SmallVector<int, 2> SFIs;
    RS->getScavengingFrameIndices(SFIs);
    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
           IE = SFIs.end(); I != IE; ++I)
      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
  }

  // FIXME: Once this is working, then enable flag will change to a target
  // check for whether the frame is large enough to want to use virtual
  // frame index registers. Functions which don't want/need this optimization
  // will continue to use the existing code path.
  if (MFI->getUseLocalStackAllocationBlock()) {
    unsigned Align = MFI->getLocalFrameMaxAlign();

    // Adjust to alignment boundary.
    Offset = RoundUpToAlignment(Offset, Align, Skew);

    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");

    // Resolve offsets for objects in the local block.
    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
            FIOffset << "]\n");
      MFI->setObjectOffset(Entry.first, FIOffset);
    }
    // Allocate the local block
    Offset += MFI->getLocalFrameSize();

    MaxAlign = std::max(Align, MaxAlign);
  }

  // Make sure that the stack protector comes before the local variables on the
  // stack.
  SmallSet<int, 16> ProtectedObjs;
  if (MFI->getStackProtectorIndex() >= 0) {
    StackObjSet LargeArrayObjs;
    StackObjSet SmallArrayObjs;
    StackObjSet AddrOfObjs;

    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                      Offset, MaxAlign, Skew);

    // Assign large stack objects first.
    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
      if (MFI->isObjectPreAllocated(i) &&
          MFI->getUseLocalStackAllocationBlock())
        continue;
      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
        continue;
      if (RS && RS->isScavengingFrameIndex((int)i))
        continue;
      if (MFI->isDeadObjectIndex(i))
        continue;
      if (MFI->getStackProtectorIndex() == (int)i)
        continue;

      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
      case StackProtector::SSPLK_None:
        continue;
      case StackProtector::SSPLK_SmallArray:
        SmallArrayObjs.insert(i);
        continue;
      case StackProtector::SSPLK_AddrOf:
        AddrOfObjs.insert(i);
        continue;
      case StackProtector::SSPLK_LargeArray:
        LargeArrayObjs.insert(i);
        continue;
      }
      llvm_unreachable("Unexpected SSPLayoutKind.");
    }

    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
                          Offset, MaxAlign, Skew);
  }

  // Then assign frame offsets to stack objects that are not used to spill
  // callee saved registers.
  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
    if (MFI->isObjectPreAllocated(i) &&
        MFI->getUseLocalStackAllocationBlock())
      continue;
    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
      continue;
    if (RS && RS->isScavengingFrameIndex((int)i))
      continue;
    if (MFI->isDeadObjectIndex(i))
      continue;
    if (MFI->getStackProtectorIndex() == (int)i)
      continue;
    if (ProtectedObjs.count(i))
      continue;

    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
  }

  // Make sure the special register scavenging spill slot is closest to the
  // stack pointer.
  if (RS && !EarlyScavengingSlots) {
    SmallVector<int, 2> SFIs;
    RS->getScavengingFrameIndices(SFIs);
    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
           IE = SFIs.end(); I != IE; ++I)
      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
  }

  if (!TFI.targetHandlesStackFrameRounding()) {
    // If we have reserved argument space for call sites in the function
    // immediately on entry to the current function, count it as part of the
    // overall stack size.
    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
      Offset += MFI->getMaxCallFrameSize();

    // Round up the size to a multiple of the alignment.  If the function has
    // any calls or alloca's, align to the target's StackAlignment value to
    // ensure that the callee's frame or the alloca data is suitably aligned;
    // otherwise, for leaf functions, align to the TransientStackAlignment
    // value.
    unsigned StackAlign;
    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
      StackAlign = TFI.getStackAlignment();
    else
      StackAlign = TFI.getTransientStackAlignment();

    // If the frame pointer is eliminated, all frame offsets will be relative to
    // SP not FP. Align to MaxAlign so this works.
    StackAlign = std::max(StackAlign, MaxAlign);
    Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
  }

  // Update frame info to pretend that this is part of the stack...
  int64_t StackSize = Offset - LocalAreaOffset;
  MFI->setStackSize(StackSize);
  NumBytesStackSpace += StackSize;
}
Example #4
0
/// findLoopBackEdges - Do a DFS walk to find loop back edges.
///
void CodeGenPrepare::findLoopBackEdges(const Function &F) {
  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
  FindFunctionBackedges(F, Edges);
  
  BackEdges.insert(Edges.begin(), Edges.end());
}
Example #5
0
/// If there were any appending global variables, link them together now.
Expected<Constant *>
IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
                                const GlobalVariable *SrcGV) {
  Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getValueType()))
                    ->getElementType();

  // FIXME: This upgrade is done during linking to support the C API.  Once the
  // old form is deprecated, we should move this upgrade to
  // llvm::UpgradeGlobalVariable() and simplify the logic here and in
  // Mapper::mapAppendingVariable() in ValueMapper.cpp.
  StringRef Name = SrcGV->getName();
  bool IsNewStructor = false;
  bool IsOldStructor = false;
  if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") {
    if (cast<StructType>(EltTy)->getNumElements() == 3)
      IsNewStructor = true;
    else
      IsOldStructor = true;
  }

  PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo();
  if (IsOldStructor) {
    auto &ST = *cast<StructType>(EltTy);
    Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
    EltTy = StructType::get(SrcGV->getContext(), Tys, false);
  }

  uint64_t DstNumElements = 0;
  if (DstGV) {
    ArrayType *DstTy = cast<ArrayType>(DstGV->getValueType());
    DstNumElements = DstTy->getNumElements();

    if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
      return stringErr(
          "Linking globals named '" + SrcGV->getName() +
          "': can only link appending global with another appending "
          "global!");

    // Check to see that they two arrays agree on type.
    if (EltTy != DstTy->getElementType())
      return stringErr("Appending variables with different element types!");
    if (DstGV->isConstant() != SrcGV->isConstant())
      return stringErr("Appending variables linked with different const'ness!");

    if (DstGV->getAlignment() != SrcGV->getAlignment())
      return stringErr(
          "Appending variables with different alignment need to be linked!");

    if (DstGV->getVisibility() != SrcGV->getVisibility())
      return stringErr(
          "Appending variables with different visibility need to be linked!");

    if (DstGV->hasGlobalUnnamedAddr() != SrcGV->hasGlobalUnnamedAddr())
      return stringErr(
          "Appending variables with different unnamed_addr need to be linked!");

    if (DstGV->getSection() != SrcGV->getSection())
      return stringErr(
          "Appending variables with different section name need to be linked!");
  }

  SmallVector<Constant *, 16> SrcElements;
  getArrayElements(SrcGV->getInitializer(), SrcElements);

  if (IsNewStructor) {
    auto It = remove_if(SrcElements, [this](Constant *E) {
      auto *Key =
          dyn_cast<GlobalValue>(E->getAggregateElement(2)->stripPointerCasts());
      if (!Key)
        return false;
      GlobalValue *DGV = getLinkedToGlobal(Key);
      return !shouldLink(DGV, *Key);
    });
    SrcElements.erase(It, SrcElements.end());
  }
  uint64_t NewSize = DstNumElements + SrcElements.size();
  ArrayType *NewType = ArrayType::get(EltTy, NewSize);

  // Create the new global variable.
  GlobalVariable *NG = new GlobalVariable(
      DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(),
      /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(),
      SrcGV->getType()->getAddressSpace());

  NG->copyAttributesFrom(SrcGV);
  forceRenaming(NG, SrcGV->getName());

  Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));

  Mapper.scheduleMapAppendingVariable(*NG,
                                      DstGV ? DstGV->getInitializer() : nullptr,
                                      IsOldStructor, SrcElements);

  // Replace any uses of the two global variables with uses of the new
  // global.
  if (DstGV) {
    DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
    DstGV->eraseFromParent();
  }

  return Ret;
}
Example #6
0
/// Expand the arguments of a function-like macro so that we can quickly
/// return preexpanded tokens from Tokens.
void TokenLexer::ExpandFunctionArguments() {
  SmallVector<Token, 128> ResultToks;

  // Loop through 'Tokens', expanding them into ResultToks.  Keep
  // track of whether we change anything.  If not, no need to keep them.  If so,
  // we install the newly expanded sequence as the new 'Tokens' list.
  bool MadeChange = false;

  const bool CalledWithVariadicArguments =
      ActualArgs->invokedWithVariadicArgument(Macro);

  VAOptExpansionContext VCtx(PP);

  for (unsigned I = 0, E = NumTokens; I != E; ++I) {
    const Token &CurTok = Tokens[I];
    // We don't want a space for the next token after a paste
    // operator.  In valid code, the token will get smooshed onto the
    // preceding one anyway. In assembler-with-cpp mode, invalid
    // pastes are allowed through: in this case, we do not want the
    // extra whitespace to be added.  For example, we want ". ## foo"
    // -> ".foo" not ". foo".
    if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
      NextTokGetsSpace = true;

    if (VCtx.isVAOptToken(CurTok)) {
      MadeChange = true;
      assert(Tokens[I + 1].is(tok::l_paren) &&
             "__VA_OPT__ must be followed by '('");

      ++I;             // Skip the l_paren
      VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(),
                                           ResultToks.size());

      continue;
    }

    // We have entered into the __VA_OPT__ context, so handle tokens
    // appropriately.
    if (VCtx.isInVAOpt()) {
      // If we are about to process a token that is either an argument to
      // __VA_OPT__ or its closing rparen, then:
      //  1) If the token is the closing rparen that exits us out of __VA_OPT__,
      //  perform any necessary stringification or placemarker processing,
      //  and/or skip to the next token.
      //  2) else if macro was invoked without variadic arguments skip this
      //  token.
      //  3) else (macro was invoked with variadic arguments) process the token
      //  normally.

      if (Tokens[I].is(tok::l_paren))
        VCtx.sawOpeningParen(Tokens[I].getLocation());
      // Continue skipping tokens within __VA_OPT__ if the macro was not
      // called with variadic arguments, else let the rest of the loop handle
      // this token. Note sawClosingParen() returns true only if the r_paren matches
      // the closing r_paren of the __VA_OPT__.
      if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
        if (!CalledWithVariadicArguments) {
          // Skip this token.
          continue;
        }
        // ... else the macro was called with variadic arguments, and we do not
        // have a closing rparen - so process this token normally.
      } else {
        // Current token is the closing r_paren which marks the end of the
        // __VA_OPT__ invocation, so handle any place-marker pasting (if
        // empty) by removing hashhash either before (if exists) or after. And
        // also stringify the entire contents if VAOPT was preceded by a hash,
        // but do so only after any token concatenation that needs to occur
        // within the contents of VAOPT.

        if (VCtx.hasStringifyOrCharifyBefore()) {
          // Replace all the tokens just added from within VAOPT into a single
          // stringified token. This requires token-pasting to eagerly occur
          // within these tokens. If either the contents of VAOPT were empty
          // or the macro wasn't called with any variadic arguments, the result
          // is a token that represents an empty string.
          stringifyVAOPTContents(ResultToks, VCtx,
                                 /*ClosingParenLoc*/ Tokens[I].getLocation());

        } else if (/*No tokens within VAOPT*/ !(
            ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) {
          // Treat VAOPT as a placemarker token.  Eat either the '##' before the
          // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
          // hashhash was not a placemarker) or the '##'
          // after VAOPT, but not both.

          if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
            ResultToks.pop_back();
          } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
            ++I; // Skip the following hashhash.
          }
        }
        VCtx.reset();
        // We processed __VA_OPT__'s closing paren (and the exit out of
        // __VA_OPT__), so skip to the next token.
        continue;
      }
    }

    // If we found the stringify operator, get the argument stringified.  The
    // preprocessor already verified that the following token is a macro
    // parameter or __VA_OPT__ when the #define was lexed.

    if (CurTok.isOneOf(tok::hash, tok::hashat)) {
      int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
      assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) &&
             "Token following # is not an argument or __VA_OPT__!");

      if (ArgNo == -1) {
        // Handle the __VA_OPT__ case.
        VCtx.sawHashOrHashAtBefore(NextTokGetsSpace,
                                   CurTok.is(tok::hashat));
        continue;
      }
      // Else handle the simple argument case.
      SourceLocation ExpansionLocStart =
          getExpansionLocForMacroDefLoc(CurTok.getLocation());
      SourceLocation ExpansionLocEnd =
          getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());

      Token Res;
      if (CurTok.is(tok::hash))  // Stringify
        Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
                                                 ExpansionLocStart,
                                                 ExpansionLocEnd);
      else {
        // 'charify': don't bother caching these.
        Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
                                           PP, true,
                                           ExpansionLocStart,
                                           ExpansionLocEnd);
      }
      Res.setFlag(Token::StringifiedInMacro);

      // The stringified/charified string leading space flag gets set to match
      // the #/#@ operator.
      if (NextTokGetsSpace)
        Res.setFlag(Token::LeadingSpace);

      ResultToks.push_back(Res);
      MadeChange = true;
      ++I;  // Skip arg name.
      NextTokGetsSpace = false;
      continue;
    }

    // Find out if there is a paste (##) operator before or after the token.
    bool NonEmptyPasteBefore =
      !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
    bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
    bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);

    assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
           "unexpected ## in ResultToks");

    // Otherwise, if this is not an argument token, just add the token to the
    // output buffer.
    IdentifierInfo *II = CurTok.getIdentifierInfo();
    int ArgNo = II ? Macro->getParameterNum(II) : -1;
    if (ArgNo == -1) {
      // This isn't an argument, just add it.
      ResultToks.push_back(CurTok);

      if (NextTokGetsSpace) {
        ResultToks.back().setFlag(Token::LeadingSpace);
        NextTokGetsSpace = false;
      } else if (PasteBefore && !NonEmptyPasteBefore)
        ResultToks.back().clearFlag(Token::LeadingSpace);

      continue;
    }

    // An argument is expanded somehow, the result is different than the
    // input.
    MadeChange = true;

    // Otherwise, this is a use of the argument.

    // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
    // are no trailing commas if __VA_ARGS__ is empty.
    if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
        MaybeRemoveCommaBeforeVaArgs(ResultToks,
                                     /*HasPasteOperator=*/false,
                                     Macro, ArgNo, PP))
      continue;

    // If it is not the LHS/RHS of a ## operator, we must pre-expand the
    // argument and substitute the expanded tokens into the result.  This is
    // C99 6.10.3.1p1.
    if (!PasteBefore && !PasteAfter) {
      const Token *ResultArgToks;

      // Only preexpand the argument if it could possibly need it.  This
      // avoids some work in common cases.
      const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
      if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
      else
        ResultArgToks = ArgTok;  // Use non-preexpanded tokens.

      // If the arg token expanded into anything, append it.
      if (ResultArgToks->isNot(tok::eof)) {
        size_t FirstResult = ResultToks.size();
        unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
        ResultToks.append(ResultArgToks, ResultArgToks+NumToks);

        // In Microsoft-compatibility mode, we follow MSVC's preprocessing
        // behavior by not considering single commas from nested macro
        // expansions as argument separators. Set a flag on the token so we can
        // test for this later when the macro expansion is processed.
        if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
            ResultToks.back().is(tok::comma))
          ResultToks.back().setFlag(Token::IgnoredComma);

        // If the '##' came from expanding an argument, turn it into 'unknown'
        // to avoid pasting.
        for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult,
                                           ResultToks.end())) {
          if (Tok.is(tok::hashhash))
            Tok.setKind(tok::unknown);
        }

        if(ExpandLocStart.isValid()) {
          updateLocForMacroArgTokens(CurTok.getLocation(),
                                     ResultToks.begin()+FirstResult,
                                     ResultToks.end());
        }

        // If any tokens were substituted from the argument, the whitespace
        // before the first token should match the whitespace of the arg
        // identifier.
        ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
                                             NextTokGetsSpace);
        ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
        NextTokGetsSpace = false;
      }
      continue;
    }

    // Okay, we have a token that is either the LHS or RHS of a paste (##)
    // argument.  It gets substituted as its non-pre-expanded tokens.
    const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
    unsigned NumToks = MacroArgs::getArgLength(ArgToks);
    if (NumToks) {  // Not an empty argument?
      bool VaArgsPseudoPaste = false;
      // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
      // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
      // the expander tries to paste ',' with the first token of the __VA_ARGS__
      // expansion.
      if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
          ResultToks[ResultToks.size()-2].is(tok::comma) &&
          (unsigned)ArgNo == Macro->getNumParams()-1 &&
          Macro->isVariadic()) {
        VaArgsPseudoPaste = true;
        // Remove the paste operator, report use of the extension.
        PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
      }

      ResultToks.append(ArgToks, ArgToks+NumToks);

      // If the '##' came from expanding an argument, turn it into 'unknown'
      // to avoid pasting.
      for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
                                         ResultToks.end())) {
        if (Tok.is(tok::hashhash))
          Tok.setKind(tok::unknown);
      }

      if (ExpandLocStart.isValid()) {
        updateLocForMacroArgTokens(CurTok.getLocation(),
                                   ResultToks.end()-NumToks, ResultToks.end());
      }

      // Transfer the leading whitespace information from the token
      // (the macro argument) onto the first token of the
      // expansion. Note that we don't do this for the GNU
      // pseudo-paste extension ", ## __VA_ARGS__".
      if (!VaArgsPseudoPaste) {
        ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
                                                             false);
        ResultToks[ResultToks.size() - NumToks].setFlagValue(
            Token::LeadingSpace, NextTokGetsSpace);
      }

      NextTokGetsSpace = false;
      continue;
    }

    // If an empty argument is on the LHS or RHS of a paste, the standard (C99
    // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur.  We
    // implement this by eating ## operators when a LHS or RHS expands to
    // empty.
    if (PasteAfter) {
      // Discard the argument token and skip (don't copy to the expansion
      // buffer) the paste operator after it.
      ++I;
      continue;
    }

    // If this is on the RHS of a paste operator, we've already copied the
    // paste operator to the ResultToks list, unless the LHS was empty too.
    // Remove it.
    assert(PasteBefore);
    if (NonEmptyPasteBefore) {
      assert(ResultToks.back().is(tok::hashhash));
      // Do not remove the paste operator if it is the one before __VA_OPT__
      // (and we are still processing tokens within VA_OPT).  We handle the case
      // of removing the paste operator if __VA_OPT__ reduces to the notional
      // placemarker above when we encounter the closing paren of VA_OPT.
      if (!VCtx.isInVAOpt() ||
          ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
        ResultToks.pop_back();
    }

    // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
    // and if the macro had at least one real argument, and if the token before
    // the ## was a comma, remove the comma.  This is a GCC extension which is
    // disabled when using -std=c99.
    if (ActualArgs->isVarargsElidedUse())
      MaybeRemoveCommaBeforeVaArgs(ResultToks,
                                   /*HasPasteOperator=*/true,
                                   Macro, ArgNo, PP);
  }

  // If anything changed, install this as the new Tokens list.
  if (MadeChange) {
    assert(!OwnsTokens && "This would leak if we already own the token list");
    // This is deleted in the dtor.
    NumTokens = ResultToks.size();
    // The tokens will be added to Preprocessor's cache and will be removed
    // when this TokenLexer finishes lexing them.
    Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);

    // The preprocessor cache of macro expanded tokens owns these tokens,not us.
    OwnsTokens = false;
  }
}
Example #7
0
/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
/// information, and remove any dead successors it has.
///
void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
                                     std::vector<Instruction*> &Worklist,
                                     Loop *L) {
  if (pred_begin(BB) != pred_end(BB)) {
    // This block isn't dead, since an edge to BB was just removed, see if there
    // are any easy simplifications we can do now.
    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
      // If it has one pred, fold phi nodes in BB.
      while (isa<PHINode>(BB->begin()))
        ReplaceUsesOfWith(BB->begin(), 
                          cast<PHINode>(BB->begin())->getIncomingValue(0), 
                          Worklist, L, LPM);
      
      // If this is the header of a loop and the only pred is the latch, we now
      // have an unreachable loop.
      if (Loop *L = LI->getLoopFor(BB))
        if (loopHeader == BB && L->contains(Pred)) {
          // Remove the branch from the latch to the header block, this makes
          // the header dead, which will make the latch dead (because the header
          // dominates the latch).
          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
          Pred->getTerminator()->eraseFromParent();
          new UnreachableInst(BB->getContext(), Pred);
          
          // The loop is now broken, remove it from LI.
          RemoveLoopFromHierarchy(L);
          
          // Reprocess the header, which now IS dead.
          RemoveBlockIfDead(BB, Worklist, L);
          return;
        }
      
      // If pred ends in a uncond branch, add uncond branch to worklist so that
      // the two blocks will get merged.
      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
        if (BI->isUnconditional())
          Worklist.push_back(BI);
    }
    return;
  }

  DEBUG(dbgs() << "Nuking dead block: " << *BB);
  
  // Remove the instructions in the basic block from the worklist.
  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
    RemoveFromWorklist(I, Worklist);
    
    // Anything that uses the instructions in this basic block should have their
    // uses replaced with undefs.
    // If I is not void type then replaceAllUsesWith undef.
    // This allows ValueHandlers and custom metadata to adjust itself.
    if (!I->getType()->isVoidTy())
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
  }
  
  // If this is the edge to the header block for a loop, remove the loop and
  // promote all subloops.
  if (Loop *BBLoop = LI->getLoopFor(BB)) {
    if (BBLoop->getLoopLatch() == BB) {
      RemoveLoopFromHierarchy(BBLoop);
      if (currentLoop == BBLoop) {
        currentLoop = 0;
        redoLoop = false;
      }
    }
  }

  // Remove the block from the loop info, which removes it from any loops it
  // was in.
  LI->removeBlock(BB);
  
  
  // Remove phi node entries in successors for this block.
  TerminatorInst *TI = BB->getTerminator();
  SmallVector<BasicBlock*, 4> Succs;
  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
    Succs.push_back(TI->getSuccessor(i));
    TI->getSuccessor(i)->removePredecessor(BB);
  }
  
  // Unique the successors, remove anything with multiple uses.
  array_pod_sort(Succs.begin(), Succs.end());
  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
  
  // Remove the basic block, including all of the instructions contained in it.
  LPM->deleteSimpleAnalysisValue(BB, L);  
  BB->eraseFromParent();
  // Remove successor blocks here that are not dead, so that we know we only
  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
  // then getting removed before we revisit them, which is badness.
  //
  for (unsigned i = 0; i != Succs.size(); ++i)
    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
      // One exception is loop headers.  If this block was the preheader for a
      // loop, then we DO want to visit the loop so the loop gets deleted.
      // We know that if the successor is a loop header, that this loop had to
      // be the preheader: the case where this was the latch block was handled
      // above and headers can only have two predecessors.
      if (!LI->isLoopHeader(Succs[i])) {
        Succs.erase(Succs.begin()+i);
        --i;
      }
    }
  
  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
    RemoveBlockIfDead(Succs[i], Worklist, L);
}
Example #8
0
void SelectionDAGBuilder::LowerStatepoint(
    ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
  // The basic scheme here is that information about both the original call and
  // the safepoint is encoded in the CallInst.  We create a temporary call and
  // lower it, then reverse engineer the calling sequence.

  NumOfStatepoints++;
  // Clear state
  StatepointLowering.startNewStatepoint(*this);

  ImmutableCallSite CS(ISP.getCallSite());

#ifndef NDEBUG
  // Consistency check
  for (const User *U : CS->users()) {
    const CallInst *Call = cast<CallInst>(U);
    if (isGCRelocate(Call))
      StatepointLowering.scheduleRelocCall(*Call);
  }
#endif

#ifndef NDEBUG
  // If this is a malformed statepoint, report it early to simplify debugging.
  // This should catch any IR level mistake that's made when constructing or
  // transforming statepoints.
  ISP.verify();

  // Check that the associated GCStrategy expects to encounter statepoints.
  assert(GFI->getStrategy().useStatepoints() &&
         "GCStrategy does not expect to encounter statepoints");
#endif

  // Lower statepoint vmstate and gcstate arguments
  SmallVector<SDValue, 10> LoweredMetaArgs;
  lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this);

  // Get call node, we will replace it later with statepoint
  SDNode *CallNode =
      lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);

  // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
  // nodes with all the appropriate arguments and return values.

  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
  SDValue Chain = CallNode->getOperand(0);

  SDValue Glue;
  bool CallHasIncomingGlue = CallNode->getGluedNode();
  if (CallHasIncomingGlue) {
    // Glue is always last operand
    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
  }

  // Build the GC_TRANSITION_START node if necessary.
  //
  // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
  // order in which they appear in the call to the statepoint intrinsic. If
  // any of the operands is a pointer-typed, that operand is immediately
  // followed by a SRCVALUE for the pointer that may be used during lowering
  // (e.g. to form MachinePointerInfo values for loads/stores).
  const bool IsGCTransition =
      (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) ==
          (uint64_t)StatepointFlags::GCTransition;
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TSOps;

    // Add chain
    TSOps.push_back(Chain);

    // Add GC transition arguments
    for (const Value *V : ISP.gc_transition_args()) {
      TSOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TSOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue if necessary
    if (CallHasIncomingGlue)
      TSOps.push_back(Glue);

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);

    Chain = GCTransitionStart.getValue(0);
    Glue = GCTransitionStart.getValue(1);
  }

  // TODO: Currently, all of these operands are being marked as read/write in
  // PrologEpilougeInserter.cpp, we should special case the VMState arguments
  // and flags to be read-only.
  SmallVector<SDValue, 40> Ops;

  // Add the <id> and <numBytes> constants.
  Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64));
  Ops.push_back(
      DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32));

  // Calculate and push starting position of vmstate arguments
  // Get number of arguments incoming directly into call node
  unsigned NumCallRegArgs =
      CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));

  // Add call target
  SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
  Ops.push_back(CallTarget);

  // Add call arguments
  // Get position of register mask in the call
  SDNode::op_iterator RegMaskIt;
  if (CallHasIncomingGlue)
    RegMaskIt = CallNode->op_end() - 2;
  else
    RegMaskIt = CallNode->op_end() - 1;
  Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);

  // Add a constant argument for the calling convention
  pushStackMapConstant(Ops, *this, CS.getCallingConv());

  // Add a constant argument for the flags
  uint64_t Flags = ISP.getFlags();
  assert(
      ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0)
          && "unknown flag used");
  pushStackMapConstant(Ops, *this, Flags);

  // Insert all vmstate and gcstate arguments
  Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());

  // Add register mask from call node
  Ops.push_back(*RegMaskIt);

  // Add chain
  Ops.push_back(Chain);

  // Same for the glue, but we add it only if original call had it
  if (Glue.getNode())
    Ops.push_back(Glue);

  // Compute return values.  Provide a glue output since we consume one as
  // input.  This allows someone else to chain off us as needed.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

  SDNode *StatepointMCNode =
      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);

  SDNode *SinkNode = StatepointMCNode;

  // Build the GC_TRANSITION_END node if necessary.
  //
  // See the comment above regarding GC_TRANSITION_START for the layout of
  // the operands to the GC_TRANSITION_END node.
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TEOps;

    // Add chain
    TEOps.push_back(SDValue(StatepointMCNode, 0));

    // Add GC transition arguments
    for (const Value *V : ISP.gc_transition_args()) {
      TEOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TEOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue
    TEOps.push_back(SDValue(StatepointMCNode, 1));

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);

    SinkNode = GCTransitionStart.getNode();
  }

  // Replace original call
  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
  // Remove originall call node
  DAG.DeleteNode(CallNode);

  // DON'T set the root - under the assumption that it's already set past the
  // inserted node we created.

  // TODO: A better future implementation would be to emit a single variable
  // argument, variable return value STATEPOINT node here and then hookup the
  // return value of each gc.relocate to the respective output of the
  // previously emitted STATEPOINT value.  Unfortunately, this doesn't appear
  // to actually be possible today.
}
Example #9
0
CXCursor cxcursor::MakeCXCursor(Stmt *S, Decl *Parent, CXTranslationUnit TU,
                                SourceRange RegionOfInterest) {
  assert(S && TU && "Invalid arguments!");
  CXCursorKind K = CXCursor_NotImplemented;
  
  switch (S->getStmtClass()) {
  case Stmt::NoStmtClass:
    break;
  
  case Stmt::CaseStmtClass:
    K = CXCursor_CaseStmt;
    break;
  
  case Stmt::DefaultStmtClass:
    K = CXCursor_DefaultStmt;
    break;
  
  case Stmt::IfStmtClass:
    K = CXCursor_IfStmt;
    break;
  
  case Stmt::SwitchStmtClass:
    K = CXCursor_SwitchStmt;
    break;
  
  case Stmt::WhileStmtClass:
    K = CXCursor_WhileStmt;
    break;
  
  case Stmt::DoStmtClass:
    K = CXCursor_DoStmt;
    break;
  
  case Stmt::ForStmtClass:
    K = CXCursor_ForStmt;
    break;
  
  case Stmt::GotoStmtClass:
    K = CXCursor_GotoStmt;
    break;
  
  case Stmt::IndirectGotoStmtClass:
    K = CXCursor_IndirectGotoStmt;
    break;
  
  case Stmt::ContinueStmtClass:
    K = CXCursor_ContinueStmt;
    break;
  
  case Stmt::BreakStmtClass:
    K = CXCursor_BreakStmt;
    break;
  
  case Stmt::ReturnStmtClass:
    K = CXCursor_ReturnStmt;
    break;
  
  case Stmt::AsmStmtClass:
    K = CXCursor_AsmStmt;
    break;
  
  case Stmt::ObjCAtTryStmtClass:
    K = CXCursor_ObjCAtTryStmt;
    break;
  
  case Stmt::ObjCAtCatchStmtClass:
    K = CXCursor_ObjCAtCatchStmt;
    break;
  
  case Stmt::ObjCAtFinallyStmtClass:
    K = CXCursor_ObjCAtFinallyStmt;
    break;
  
  case Stmt::ObjCAtThrowStmtClass:
    K = CXCursor_ObjCAtThrowStmt;
    break;
  
  case Stmt::ObjCAtSynchronizedStmtClass:
    K = CXCursor_ObjCAtSynchronizedStmt;
    break;
  
  case Stmt::ObjCAutoreleasePoolStmtClass:
    K = CXCursor_ObjCAutoreleasePoolStmt;
    break;
  
  case Stmt::ObjCForCollectionStmtClass:
    K = CXCursor_ObjCForCollectionStmt;
    break;
  
  case Stmt::CXXCatchStmtClass:
    K = CXCursor_CXXCatchStmt;
    break;
  
  case Stmt::CXXTryStmtClass:
    K = CXCursor_CXXTryStmt;
    break;
  
  case Stmt::CXXForRangeStmtClass:
    K = CXCursor_CXXForRangeStmt;
    break;
  
  case Stmt::SEHTryStmtClass:
    K = CXCursor_SEHTryStmt;
    break;
  
  case Stmt::SEHExceptStmtClass:
    K = CXCursor_SEHExceptStmt;
    break;
  
  case Stmt::SEHFinallyStmtClass:
    K = CXCursor_SEHFinallyStmt;
    break;
  
  case Stmt::ArrayTypeTraitExprClass:
  case Stmt::AsTypeExprClass:
  case Stmt::AtomicExprClass:
  case Stmt::BinaryConditionalOperatorClass:
  case Stmt::BinaryTypeTraitExprClass:
  case Stmt::CXXBindTemporaryExprClass:
  case Stmt::CXXDefaultArgExprClass:
  case Stmt::CXXScalarValueInitExprClass:
  case Stmt::CXXUuidofExprClass:
  case Stmt::ChooseExprClass:
  case Stmt::DesignatedInitExprClass:
  case Stmt::ExprWithCleanupsClass:
  case Stmt::ExpressionTraitExprClass:
  case Stmt::ExtVectorElementExprClass:
  case Stmt::ImplicitCastExprClass:
  case Stmt::ImplicitValueInitExprClass:
  case Stmt::MaterializeTemporaryExprClass:
  case Stmt::ObjCIndirectCopyRestoreExprClass:
  case Stmt::OffsetOfExprClass:
  case Stmt::ParenListExprClass:
  case Stmt::PredefinedExprClass:
  case Stmt::ShuffleVectorExprClass:
  case Stmt::UnaryExprOrTypeTraitExprClass:
  case Stmt::UnaryTypeTraitExprClass:
  case Stmt::VAArgExprClass:
    K = CXCursor_UnexposedExpr;
    break;

  case Stmt::OpaqueValueExprClass:
    if (Expr *Src = cast<OpaqueValueExpr>(S)->getSourceExpr())
      return MakeCXCursor(Src, Parent, TU, RegionOfInterest);
    K = CXCursor_UnexposedExpr;
    break;

  case Stmt::PseudoObjectExprClass:
    return MakeCXCursor(cast<PseudoObjectExpr>(S)->getSyntacticForm(),
                        Parent, TU, RegionOfInterest);

  case Stmt::CompoundStmtClass:
    K = CXCursor_CompoundStmt;
    break;
      
  case Stmt::NullStmtClass:
    K = CXCursor_NullStmt;
    break;
      
  case Stmt::LabelStmtClass:
    K = CXCursor_LabelStmt;
    break;
  
  case Stmt::DeclStmtClass:
    K = CXCursor_DeclStmt;
    break;
 
  case Stmt::IntegerLiteralClass:
    K = CXCursor_IntegerLiteral;
    break;

  case Stmt::FloatingLiteralClass:
    K = CXCursor_FloatingLiteral;
    break;

  case Stmt::ImaginaryLiteralClass:
    K = CXCursor_ImaginaryLiteral;
    break;

  case Stmt::StringLiteralClass:
    K = CXCursor_StringLiteral;
    break;

  case Stmt::CharacterLiteralClass:
    K = CXCursor_CharacterLiteral;
    break;

  case Stmt::ParenExprClass:
    K = CXCursor_ParenExpr;
    break;

  case Stmt::UnaryOperatorClass:
    K = CXCursor_UnaryOperator;
    break;
  
  case Stmt::CXXNoexceptExprClass:
    K = CXCursor_UnaryExpr;
    break;

  case Stmt::ArraySubscriptExprClass:
    K = CXCursor_ArraySubscriptExpr;
    break;

  case Stmt::BinaryOperatorClass:
    K = CXCursor_BinaryOperator;
    break;

  case Stmt::CompoundAssignOperatorClass:
    K = CXCursor_CompoundAssignOperator;
    break;

  case Stmt::ConditionalOperatorClass:
    K = CXCursor_ConditionalOperator;
    break;

  case Stmt::CStyleCastExprClass:
    K = CXCursor_CStyleCastExpr;
    break;

  case Stmt::CompoundLiteralExprClass:
    K = CXCursor_CompoundLiteralExpr;
    break;

  case Stmt::InitListExprClass:
    K = CXCursor_InitListExpr;
    break;

  case Stmt::AddrLabelExprClass:
    K = CXCursor_AddrLabelExpr;
    break;

  case Stmt::StmtExprClass:
    K = CXCursor_StmtExpr;
    break;

  case Stmt::GenericSelectionExprClass:
    K = CXCursor_GenericSelectionExpr;
    break;

  case Stmt::GNUNullExprClass:
    K = CXCursor_GNUNullExpr;
    break;

  case Stmt::CXXStaticCastExprClass:
    K = CXCursor_CXXStaticCastExpr;
    break;

  case Stmt::CXXDynamicCastExprClass:
    K = CXCursor_CXXDynamicCastExpr;
    break;

  case Stmt::CXXReinterpretCastExprClass:
    K = CXCursor_CXXReinterpretCastExpr;
    break;

  case Stmt::CXXConstCastExprClass:
    K = CXCursor_CXXConstCastExpr;
    break;

  case Stmt::CXXFunctionalCastExprClass:
    K = CXCursor_CXXFunctionalCastExpr;
    break;

  case Stmt::CXXTypeidExprClass:
    K = CXCursor_CXXTypeidExpr;
    break;

  case Stmt::CXXBoolLiteralExprClass:
    K = CXCursor_CXXBoolLiteralExpr;
    break;

  case Stmt::CXXNullPtrLiteralExprClass:
    K = CXCursor_CXXNullPtrLiteralExpr;
    break;

  case Stmt::CXXThisExprClass:
    K = CXCursor_CXXThisExpr;
    break;

  case Stmt::CXXThrowExprClass:
    K = CXCursor_CXXThrowExpr;
    break;

  case Stmt::CXXNewExprClass:
    K = CXCursor_CXXNewExpr;
    break;

  case Stmt::CXXDeleteExprClass:
    K = CXCursor_CXXDeleteExpr;
    break;

  case Stmt::ObjCStringLiteralClass:
    K = CXCursor_ObjCStringLiteral;
    break;

  case Stmt::ObjCEncodeExprClass:
    K = CXCursor_ObjCEncodeExpr;
    break;

  case Stmt::ObjCSelectorExprClass:
    K = CXCursor_ObjCSelectorExpr;
    break;

  case Stmt::ObjCProtocolExprClass:
    K = CXCursor_ObjCProtocolExpr;
    break;

  case Stmt::ObjCBridgedCastExprClass:
    K = CXCursor_ObjCBridgedCastExpr;
    break;

  case Stmt::BlockExprClass:
    K = CXCursor_BlockExpr;
    break;

  case Stmt::PackExpansionExprClass:
    K = CXCursor_PackExpansionExpr;
    break;

  case Stmt::SizeOfPackExprClass:
    K = CXCursor_SizeOfPackExpr;
    break;

  case Stmt::BlockDeclRefExprClass:
  case Stmt::DeclRefExprClass:           
  case Stmt::DependentScopeDeclRefExprClass:
  case Stmt::SubstNonTypeTemplateParmExprClass:
  case Stmt::SubstNonTypeTemplateParmPackExprClass:
  case Stmt::UnresolvedLookupExprClass:
    K = CXCursor_DeclRefExpr;
    break;
      
  case Stmt::CXXDependentScopeMemberExprClass:
  case Stmt::CXXPseudoDestructorExprClass:
  case Stmt::MemberExprClass:            
  case Stmt::ObjCIsaExprClass:
  case Stmt::ObjCIvarRefExprClass:    
  case Stmt::ObjCPropertyRefExprClass: 
  case Stmt::UnresolvedMemberExprClass:
    K = CXCursor_MemberRefExpr;
    break;
      
  case Stmt::CallExprClass:              
  case Stmt::CXXOperatorCallExprClass:
  case Stmt::CXXMemberCallExprClass:
  case Stmt::CUDAKernelCallExprClass:
  case Stmt::CXXConstructExprClass:  
  case Stmt::CXXTemporaryObjectExprClass:
  case Stmt::CXXUnresolvedConstructExprClass:
    K = CXCursor_CallExpr;
    break;
      
  case Stmt::ObjCMessageExprClass: {
    K = CXCursor_ObjCMessageExpr;
    int SelectorIdIndex = -1;
    // Check if cursor points to a selector id.
    if (RegionOfInterest.isValid() &&
        RegionOfInterest.getBegin() == RegionOfInterest.getEnd()) {
      SmallVector<SourceLocation, 16> SelLocs;
      cast<ObjCMessageExpr>(S)->getSelectorLocs(SelLocs);
      SmallVector<SourceLocation, 16>::iterator
        I=std::find(SelLocs.begin(), SelLocs.end(),RegionOfInterest.getBegin());
      if (I != SelLocs.end())
        SelectorIdIndex = I - SelLocs.begin();
    }
    CXCursor C = { K, 0, { Parent, S, TU } };
    return getSelectorIdentifierCursor(SelectorIdIndex, C);
  }
      
  case Stmt::MSDependentExistsStmtClass:
    K = CXCursor_UnexposedStmt;
    break;
  }
  
  CXCursor C = { K, 0, { Parent, S, TU } };
  return C;
}
Example #10
0
// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
// single block and then recurses into the successors.
// The algorithm preserves the flow condition, meaning that the sum of the
// weight of the incoming edges must be equal the block weight which must in
// turn be equal to the sume of the weights of the outgoing edges.
// Since the flow of an block is deterimined from the current state of the
// flow, once an edge has a flow assigned this flow is never changed again,
// otherwise it would be possible to violate the flow condition in another
// block.
void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {

  // Break the recursion if this BasicBlock was already visited.
  if (BBToVisit.find(BB) == BBToVisit.end()) return;

  // Read the LoopInfo for this block.
  bool  BBisHeader = LI->isLoopHeader(BB);
  Loop* BBLoop     = LI->getLoopFor(BB);

  // To get the block weight, read all incoming edges.
  double BBWeight = 0;
  std::set<BasicBlock*> ProcessedPreds;
  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
        bbi != bbe; ++bbi ) {
    // If this block was not considered already, add weight.
    Edge edge = getEdge(*bbi,BB);
    double w = getEdgeWeight(edge);
    if (ProcessedPreds.insert(*bbi).second) {
      BBWeight += ignoreMissing(w);
    }
    // If this block is a loop header and the predecessor is contained in this
    // loop, thus the edge is a backedge, continue and do not check if the
    // value is valid.
    if (BBisHeader && BBLoop->contains(*bbi)) {
      printEdgeError(edge, "but is backedge, continuing");
      continue;
    }
    // If the edges value is missing (and this is no loop header, and this is
    // no backedge) return, this block is currently non estimatable.
    if (w == MissingValue) {
      printEdgeError(edge, "returning");
      return;
    }
  }
  if (getExecutionCount(BB) != MissingValue) {
    BBWeight = getExecutionCount(BB);
  }

  // Fetch all necessary information for current block.
  SmallVector<Edge, 8> ExitEdges;
  SmallVector<Edge, 8> Edges;
  if (BBLoop) {
    BBLoop->getExitEdges(ExitEdges);
  }

  // If this is a loop header, consider the following:
  // Exactly the flow that is entering this block, must exit this block too. So
  // do the following: 
  // *) get all the exit edges, read the flow that is already leaving this
  // loop, remember the edges that do not have any flow on them right now.
  // (The edges that have already flow on them are most likely exiting edges of
  // other loops, do not touch those flows because the previously caclulated
  // loopheaders would not be exact anymore.)
  // *) In case there is not a single exiting edge left, create one at the loop
  // latch to prevent the flow from building up in the loop.
  // *) Take the flow that is not leaving the loop already and distribute it on
  // the remaining exiting edges.
  // (This ensures that all flow that enters the loop also leaves it.)
  // *) Increase the flow into the loop by increasing the weight of this block.
  // There is at least one incoming backedge that will bring us this flow later
  // on. (So that the flow condition in this node is valid again.)
  if (BBisHeader) {
    double incoming = BBWeight;
    // Subtract the flow leaving the loop.
    std::set<Edge> ProcessedExits;
    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
         ee = ExitEdges.end(); ei != ee; ++ei) {
      if (ProcessedExits.insert(*ei).second) {
        double w = getEdgeWeight(*ei);
        if (w == MissingValue) {
          Edges.push_back(*ei);
          // Check if there is a necessary minimal weight, if yes, subtract it 
          // from weight.
          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
            incoming -= MinimalWeight[*ei];
            DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
          }
        } else {
          incoming -= w;
        }
      }
    }
    // If no exit edges, create one:
    if (Edges.size() == 0) {
      BasicBlock *Latch = BBLoop->getLoopLatch();
      if (Latch) {
        Edge edge = getEdge(Latch,0);
        EdgeInformation[BB->getParent()][edge] = BBWeight;
        printEdgeWeight(edge);
        edge = getEdge(Latch, BB);
        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
        printEdgeWeight(edge);
      }
    }

    // Distribute remaining weight to the exting edges. To prevent fractions
    // from building up and provoking precision problems the weight which is to
    // be distributed is split and the rounded, the last edge gets a somewhat
    // bigger value, but we are close enough for an estimation.
    double fraction = floor(incoming/Edges.size());
    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
         ei != ee; ++ei) {
      double w = 0;
      if (ei != (ee-1)) {
        w = fraction;
        incoming -= fraction;
      } else {
        w = incoming;
      }
      EdgeInformation[BB->getParent()][*ei] += w;
      // Read necessary minimal weight.
      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
        DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
      }
      printEdgeWeight(*ei);
      
      // Add minimal weight to paths to all exit edges, this is used to ensure
      // that enough flow is reaching this edges.
      Path p;
      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
      while (Dest != BB) {
        const BasicBlock *Parent = p.find(Dest)->second;
        Edge e = getEdge(Parent, Dest);
        if (MinimalWeight.find(e) == MinimalWeight.end()) {
          MinimalWeight[e] = 0;
        }
        MinimalWeight[e] += w;
        DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
        Dest = Parent;
      }
    }
    // Increase flow into the loop.
    BBWeight *= (ExecCount+1);
  }

  BlockInformation[BB->getParent()][BB] = BBWeight;
  // Up until now we considered only the loop exiting edges, now we have a
  // definite block weight and must distribute this onto the outgoing edges.
  // Since there may be already flow attached to some of the edges, read this
  // flow first and remember the edges that have still now flow attached.
  Edges.clear();
  std::set<BasicBlock*> ProcessedSuccs;

  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
  // Also check for (BB,0) edges that may already contain some flow. (But only
  // in case there are no successors.)
  if (bbi == bbe) {
    Edge edge = getEdge(BB,0);
    EdgeInformation[BB->getParent()][edge] = BBWeight;
    printEdgeWeight(edge);
  }
  for ( ; bbi != bbe; ++bbi ) {
    if (ProcessedSuccs.insert(*bbi).second) {
      Edge edge = getEdge(BB,*bbi);
      double w = getEdgeWeight(edge);
      if (w != MissingValue) {
        BBWeight -= getEdgeWeight(edge);
      } else {
        Edges.push_back(edge);
        // If minimal weight is necessary, reserve weight by subtracting weight
        // from block weight, this is readded later on.
        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
          BBWeight -= MinimalWeight[edge];
          DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
        }
      }
    }
  }

  double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
  // Finally we know what flow is still not leaving the block, distribute this
  // flow onto the empty edges.
  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
       ei != ee; ++ei) {
    if (ei != (ee-1)) {
      EdgeInformation[BB->getParent()][*ei] += fraction;
      BBWeight -= fraction;
    } else {
      EdgeInformation[BB->getParent()][*ei] += BBWeight;
    }
    // Readd minial necessary weight.
    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
      DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
    }
    printEdgeWeight(*ei);
  }

  // This block is visited, mark this before the recursion.
  BBToVisit.erase(BB);

  // Recurse into successors.
  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
       bbi != bbe; ++bbi) {
    recurseBasicBlock(*bbi);
  }
}
Example #11
0
/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
/// mode of the machine to fold the specified instruction into a load or store
/// that ultimately uses it.  However, the specified instruction has multiple
/// uses.  Given this, it may actually increase register pressure to fold it
/// into the load.  For example, consider this code:
///
///     X = ...
///     Y = X+1
///     use(Y)   -> nonload/store
///     Z = Y+1
///     load Z
///
/// In this case, Y has multiple uses, and can be folded into the load of Z
/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
/// number of computations either.
///
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case.  This would make Y die earlier.
bool AddressingModeMatcher::
IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
                                     ExtAddrMode &AMAfter) {
  if (IgnoreProfitability) return true;
  
  // AMBefore is the addressing mode before this instruction was folded into it,
  // and AMAfter is the addressing mode after the instruction was folded.  Get
  // the set of registers referenced by AMAfter and subtract out those
  // referenced by AMBefore: this is the set of values which folding in this
  // address extends the lifetime of.
  //
  // Note that there are only two potential values being referenced here,
  // BaseReg and ScaleReg (global addresses are always available, as are any
  // folded immediates).
  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
  
  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
  // lifetime wasn't extended by adding this instruction.
  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
    BaseReg = 0;
  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
    ScaledReg = 0;

  // If folding this instruction (and it's subexprs) didn't extend any live
  // ranges, we're ok with it.
  if (BaseReg == 0 && ScaledReg == 0)
    return true;

  // If all uses of this instruction are ultimately load/store/inlineasm's,
  // check to see if their addressing modes will include this instruction.  If
  // so, we can fold it into all uses, so it doesn't matter if it has multiple
  // uses.
  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
  SmallPtrSet<Instruction*, 16> ConsideredInsts;
  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
    return false;  // Has a non-memory, non-foldable use!
  
  // Now that we know that all uses of this instruction are part of a chain of
  // computation involving only operations that could theoretically be folded
  // into a memory use, loop over each of these uses and see if they could
  // *actually* fold the instruction.
  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
    Instruction *User = MemoryUses[i].first;
    unsigned OpNo = MemoryUses[i].second;
    
    // Get the access type of this use.  If the use isn't a pointer, we don't
    // know what it accesses.
    Value *Address = User->getOperand(OpNo);
    if (!Address->getType()->isPointerTy())
      return false;
    Type *AddressAccessTy =
      cast<PointerType>(Address->getType())->getElementType();
    
    // Do a match against the root of this address, ignoring profitability. This
    // will tell us if the addressing mode for the memory operation will
    // *actually* cover the shared instruction.
    ExtAddrMode Result;
    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
                                  MemoryInst, Result);
    Matcher.IgnoreProfitability = true;
    bool Success = Matcher.MatchAddr(Address, 0);
    (void)Success; assert(Success && "Couldn't select *anything*?");

    // If the match didn't cover I, then it won't be shared by it.
    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
                  I) == MatchedAddrModeInsts.end())
      return false;
    
    MatchedAddrModeInsts.clear();
  }
  
  return true;
}
Example #12
0
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  if (MRI->hasOneNonDBGUse(SrcReg))
    // No other uses.
    return false;

  // Ensure DstReg can get a register class that actually supports
  // sub-registers. Don't change the class until we commit.
  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
  DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
  if (!DstRC)
    return false;

  // The ext instr may be operating on a sub-register of SrcReg as well.
  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
  // register.
  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
  // SrcReg:SubIdx should be replaced.
  bool UseSrcSubIdx = TM->getRegisterInfo()->
    getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI)
    ReachedBBs.insert(UI->getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = &*UI;
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // Only accept uses of SrcReg:SubIdx.
    if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
      continue;

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    for (MachineRegisterInfo::use_nodbg_iterator
         UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
         UI != UE; ++UI)
      if (UI->isPHI())
        PHIBBs.insert(UI->getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed) {
        MRI->clearKillFlags(DstReg);
        MRI->constrainRegClass(DstReg, DstRC);
      }

      unsigned NewVR = MRI->createVirtualRegister(RC);
      MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
                                   TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);
      // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
      if (UseSrcSubIdx) {
        Copy->getOperand(0).setSubReg(SubIdx);
        Copy->getOperand(0).setIsUndef();
      }
      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}
bool
AndroidBitcodeLinker::LinkInArchive(AndroidBitcodeItem &Item) {
const StringRef &Filename = Item.getFile();

  verbose("Linking archive file '" + Filename.str() + "'");

  std::set<std::string> UndefinedSymbols;
  std::set<std::string> DefinedSymbols;
  GetAllSymbols(linker->getModule(), UndefinedSymbols, DefinedSymbols);

  // Update list
  set_union(UndefinedSymbols, GlobalUndefinedSymbols);
  set_union(DefinedSymbols, GlobalDefinedSymbols);
  set_subtract(UndefinedSymbols, DefinedSymbols);

  if (UndefinedSymbols.empty()) {
    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
    return false;  // No need to link anything in!
  }

  std::string ErrMsg;
  std::unique_ptr<Archive> AutoArch(
    Archive::OpenAndLoadSymbols(Filename, Config.getContext(), &ErrMsg));

  Archive* arch = AutoArch.get();

  // possible empty archive?
  if (!arch) {
    return false;
  }

  if (!arch->isBitcodeArchive()) {
    Item.setNative(true);
    if (Config.isLinkNativeBinary()) {
      return false;
    }
    else {
      return error("Cannot link native binaries with bitcode" + Filename.str());
    }
  }

  std::set<std::string> NotDefinedByArchive;

  std::set<std::string> CurrentlyUndefinedSymbols;

  do {
    CurrentlyUndefinedSymbols = UndefinedSymbols;

    SmallVector<Module*, 16> Modules;
    if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
      return error("Cannot find symbols in '" + Filename.str() +
                   "': " + ErrMsg);

    if (Modules.empty())
      break;

    NotDefinedByArchive.insert(UndefinedSymbols.begin(),
        UndefinedSymbols.end());

    for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end();
         I != E; ++I) {

      Module* aModule = *I;
      if (aModule != NULL) {
        if (std::error_code ec = aModule->materializeAll())
          return error("Could not load a module: " + ec.message());

        verbose("  Linking in module: " + aModule->getModuleIdentifier());

        // Link it in
        if (linker->linkInModule(aModule))
          return error("Cannot link in module '" +
                       aModule->getModuleIdentifier() + "'");
      }
    }

    GetAllSymbols(linker->getModule(), UndefinedSymbols, DefinedSymbols);

    set_subtract(UndefinedSymbols, NotDefinedByArchive);

    if (UndefinedSymbols.empty())
      break;
  } while (CurrentlyUndefinedSymbols != UndefinedSymbols);

  return false;
}
Example #14
0
// This is the marker algorithm from "Simple and Efficient Construction of
// Static Single Assignment Form"
// The simple, non-marker algorithm places phi nodes at any join
// Here, we place markers, and only place phi nodes if they end up necessary.
// They are only necessary if they break a cycle (IE we recursively visit
// ourselves again), or we discover, while getting the value of the operands,
// that there are two or more definitions needing to be merged.
// This still will leave non-minimal form in the case of irreducible control
// flow, where phi nodes may be in cycles with themselves, but unnecessary.
MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
    BasicBlock *BB,
    DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
  // First, do a cache lookup. Without this cache, certain CFG structures
  // (like a series of if statements) take exponential time to visit.
  auto Cached = CachedPreviousDef.find(BB);
  if (Cached != CachedPreviousDef.end()) {
    return Cached->second;
  }

  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
    // Single predecessor case, just recurse, we can only have one definition.
    MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
    CachedPreviousDef.insert({BB, Result});
    return Result;
  }

  if (VisitedBlocks.count(BB)) {
    // We hit our node again, meaning we had a cycle, we must insert a phi
    // node to break it so we have an operand. The only case this will
    // insert useless phis is if we have irreducible control flow.
    MemoryAccess *Result = MSSA->createMemoryPhi(BB);
    CachedPreviousDef.insert({BB, Result});
    return Result;
  }

  if (VisitedBlocks.insert(BB).second) {
    // Mark us visited so we can detect a cycle
    SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;

    // Recurse to get the values in our predecessors for placement of a
    // potential phi node. This will insert phi nodes if we cycle in order to
    // break the cycle and have an operand.
    for (auto *Pred : predecessors(BB))
      PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));

    // Now try to simplify the ops to avoid placing a phi.
    // This may return null if we never created a phi yet, that's okay
    MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));

    // See if we can avoid the phi by simplifying it.
    auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
    // If we couldn't simplify, we may have to create a phi
    if (Result == Phi) {
      if (!Phi)
        Phi = MSSA->createMemoryPhi(BB);

      // See if the existing phi operands match what we need.
      // Unlike normal SSA, we only allow one phi node per block, so we can't just
      // create a new one.
      if (Phi->getNumOperands() != 0) {
        // FIXME: Figure out whether this is dead code and if so remove it.
        if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
          // These will have been filled in by the recursive read we did above.
          std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
          std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
        }
      } else {
        unsigned i = 0;
        for (auto *Pred : predecessors(BB))
          Phi->addIncoming(&*PhiOps[i++], Pred);
        InsertedPHIs.push_back(Phi);
      }
      Result = Phi;
    }

    // Set ourselves up for the next variable by resetting visited state.
    VisitedBlocks.erase(BB);
    CachedPreviousDef.insert({BB, Result});
    return Result;
  }
  llvm_unreachable("Should have hit one of the three cases above");
}
Example #15
0
void SuperBlock::constructSuperBlocks(Function& F) {
    superBlocks.clear();

    // key: execution count, value: unvisited BBs
    map<double, list<BasicBlock*> >* execBBs =
        new map<double, list<BasicBlock*> >();

    // stores the set of unvisited BBs
    set<BasicBlock*>* unvisitedBBs =
        new set<BasicBlock*>();

    // stores the set of backedges described as (from, to) pair
    set<pair<const BasicBlock*, const BasicBlock*> >* backEdges =
        new set<pair<const BasicBlock*, const BasicBlock*> >();

    // retrieve all backedges contained in this current function
    SmallVector<pair<const BasicBlock*, const BasicBlock*>, 32> backEdgesVector;

    // transfer everything from backEdgesVector to backEdges
    backEdges->insert(backEdgesVector.begin(), backEdgesVector.end());

    // insert all the BBs in this function into set of unvisitedBBs
    for (Function::iterator funcIter = F.begin(), funcIterEnd = F.end();
            funcIter != funcIterEnd; ++funcIter) {
        // gets number of instructions in this BB
        originalCodeSize += funcIter->size();
        currCodeSize += funcIter->size();

        double execCount = PI->getExecutionCount(funcIter);
        map<double, list<BasicBlock*> >::iterator it = execBBs->find(execCount);
        if (it == execBBs->end()) {
            list<BasicBlock*> arg(1, funcIter);
            execBBs->insert(pair<double, list<BasicBlock*> >(execCount, arg));
        } else {
            it->second.push_back(funcIter);
        }

        unvisitedBBs->insert(funcIter);
    }

    // traverse each BB in descending order of their execution counts
    for (map<double, list<BasicBlock*> >::reverse_iterator arg_b =
                execBBs->rbegin(), arg_e = execBBs->rend(); arg_b != arg_e; ++arg_b) {
        for (list<BasicBlock*>::iterator cur_b = arg_b->second.begin(),
                cur_e = arg_b->second.end(); cur_b != cur_e; ++cur_b) {
            if (unvisitedBBs->find(*cur_b) == unvisitedBBs->end()) {
                continue;
            }

            // use this current BB as seed and remove from set
            unvisitedBBs->erase(*cur_b);

            list<BasicBlock*> trace;
            list<BasicBlock*>::iterator traceIter;

            trace.push_back(*cur_b);

            // run trace selection algorithm
            BasicBlock* currBB = *cur_b;
            BasicBlock* nextBB;
            double cum_prob = 1;

            // grow trace forward
            while (1) {
                nextBB = bestSuccessor(currBB, cum_prob, unvisitedBBs, backEdges);
                if (nextBB == NULL) {
                    break;
                }

                trace.push_back(nextBB);
                unvisitedBBs->erase(nextBB);
                currBB = nextBB;
            }

            // start creating backwards from current BB again
            currBB = *cur_b;

            // grow trace backwards
            while (1) {
                nextBB = bestPredecessor(currBB, cum_prob, unvisitedBBs, backEdges);
                if (nextBB == NULL) {
                    break;
                }

                trace.push_front(nextBB);
                unvisitedBBs->erase(nextBB);
                currBB = nextBB;
            }

            // record our newly found superblock
            // we don't record superblocks of size 1
            if (trace.front() != trace.back()) {
                BasicBlock* head = trace.front();
                trace.pop_front();
                superBlocks[head] = trace;

                // populate partOfSuperBlock map
                partOfSuperBlock[head] = head;
                for (traceIter = trace.begin(); traceIter != trace.end(); ++traceIter) {
                    partOfSuperBlock[*traceIter] = head;
                }
            }
        }
    }

    delete execBBs;
    delete unvisitedBBs;
    delete backEdges;
}
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block.  Ex:
/// %A = alloca i32
/// ...
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Keep track of all of the stack objects that are dead at the end of the
  // function.
  SmallSetVector<Value*, 16> DeadStackObjects;

  // Find all of the alloca'd pointers in the entry block.
  BasicBlock *Entry = BB.getParent()->begin();
  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
    if (isa<AllocaInst>(I))
      DeadStackObjects.insert(I);

    // Okay, so these are dead heap objects, but if the pointer never escapes
    // then it's leaked by this function anyways.
    else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
      DeadStackObjects.insert(I);
  }

  // Treat byval or inalloca arguments the same, stores to them are dead at the
  // end of the function.
  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
    if (AI->hasByValOrInAllocaAttr())
      DeadStackObjects.insert(AI);

  // Scan the basic block backwards
  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
    --BBI;

    // If we find a store, check to see if it points into a dead stack value.
    if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
      // See through pointer-to-pointer bitcasts
      SmallVector<Value *, 4> Pointers;
      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);

      // Stores to stack values are valid candidates for removal.
      bool AllDead = true;
      for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
           E = Pointers.end(); I != E; ++I)
        if (!DeadStackObjects.count(*I)) {
          AllDead = false;
          break;
        }

      if (AllDead) {
        Instruction *Dead = BBI++;

        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                     << *Dead << "\n  Objects: ";
              for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
                   E = Pointers.end(); I != E; ++I) {
                dbgs() << **I;
                if (llvm::next(I) != E)
                  dbgs() << ", ";
              }
              dbgs() << '\n');

        // DCE instructions only used to calculate that store.
        DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
        ++NumFastStores;
        MadeChange = true;
        continue;
      }
    }

    // Remove any dead non-memory-mutating instructions.
    if (isInstructionTriviallyDead(BBI, TLI)) {
      Instruction *Inst = BBI++;
      DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
      ++NumFastOther;
      MadeChange = true;
      continue;
    }

    if (isa<AllocaInst>(BBI)) {
      // Remove allocas from the list of dead stack objects; there can't be
      // any references before the definition.
      DeadStackObjects.remove(BBI);
      continue;
    }

    if (CallSite CS = cast<Value>(BBI)) {
      // Remove allocation function calls from the list of dead stack objects; 
      // there can't be any references before the definition.
      if (isAllocLikeFn(BBI, TLI))
        DeadStackObjects.remove(BBI);

      // If this call does not access memory, it can't be loading any of our
      // pointers.
      if (AA->doesNotAccessMemory(CS))
        continue;

      // If the call might load from any of our allocas, then any store above
      // the call is live.
      CouldRef Pred = { CS, AA };
      DeadStackObjects.remove_if(Pred);

      // If all of the allocas were clobbered by the call then we're not going
      // to find anything else to process.
      if (DeadStackObjects.empty())
        break;

      continue;
    }

    AliasAnalysis::Location LoadedLoc;

    // If we encounter a use of the pointer, it is no longer considered dead
    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
      if (!L->isUnordered()) // Be conservative with atomic/volatile load
        break;
      LoadedLoc = AA->getLocation(L);
    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
      LoadedLoc = AA->getLocation(V);
    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
      LoadedLoc = AA->getLocationForSource(MTI);
    } else if (!BBI->mayReadFromMemory()) {
      // Instruction doesn't read memory.  Note that stores that weren't removed
      // above will hit this case.
      continue;
    } else {
      // Unknown inst; assume it clobbers everything.
      break;
    }

    // Remove any allocas from the DeadPointer set that are loaded, as this
    // makes any stores above the access live.
    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);

    // If all of the allocas were clobbered by the access then we're not going
    // to find anything else to process.
    if (DeadStackObjects.empty())
      break;
  }
Example #17
0
int main(int argc_, const char **argv_) {
  llvm::sys::PrintStackTraceOnErrorSignal();
  llvm::PrettyStackTraceProgram X(argc_, argv_);

  std::set<std::string> SavedStrings;
  SmallVector<const char*, 256> argv;

  ExpandArgv(argc_, argv_, argv, SavedStrings);

  // Handle -cc1 integrated tools.
  if (argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) {
    StringRef Tool = argv[1] + 4;

    if (Tool == "")
      return cc1_main(argv.data()+2, argv.data()+argv.size(), argv[0],
                      (void*) (intptr_t) GetExecutablePath);
    if (Tool == "as")
      return cc1as_main(argv.data()+2, argv.data()+argv.size(), argv[0],
                      (void*) (intptr_t) GetExecutablePath);

    // Reject unknown tools.
    llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n";
    return 1;
  }

  bool CanonicalPrefixes = true;
  for (int i = 1, size = argv.size(); i < size; ++i) {
    if (StringRef(argv[i]) == "-no-canonical-prefixes") {
      CanonicalPrefixes = false;
      break;
    }
  }

  // Handle QA_OVERRIDE_GCC3_OPTIONS and CCC_ADD_ARGS, used for editing a
  // command line behind the scenes.
  if (const char *OverrideStr = ::getenv("QA_OVERRIDE_GCC3_OPTIONS")) {
    // FIXME: Driver shouldn't take extra initial argument.
    ApplyQAOverride(argv, OverrideStr, SavedStrings);
  } else if (const char *Cur = ::getenv("CCC_ADD_ARGS")) {
    // FIXME: Driver shouldn't take extra initial argument.
    std::vector<const char*> ExtraArgs;

    for (;;) {
      const char *Next = strchr(Cur, ',');

      if (Next) {
        ExtraArgs.push_back(SaveStringInSet(SavedStrings,
                                            std::string(Cur, Next)));
        Cur = Next + 1;
      } else {
        if (*Cur != '\0')
          ExtraArgs.push_back(SaveStringInSet(SavedStrings, Cur));
        break;
      }
    }

    argv.insert(&argv[1], ExtraArgs.begin(), ExtraArgs.end());
  }

  llvm::sys::Path Path = GetExecutablePath(argv[0], CanonicalPrefixes);

  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions;
  {
    // Note that ParseDiagnosticArgs() uses the cc1 option table.
    OwningPtr<OptTable> CC1Opts(createDriverOptTable());
    unsigned MissingArgIndex, MissingArgCount;
    OwningPtr<InputArgList> Args(CC1Opts->ParseArgs(argv.begin()+1, argv.end(),
                                            MissingArgIndex, MissingArgCount));
    // We ignore MissingArgCount and the return value of ParseDiagnosticArgs.
    // Any errors that would be diagnosed here will also be diagnosed later,
    // when the DiagnosticsEngine actually exists.
    (void) ParseDiagnosticArgs(*DiagOpts, *Args);
  }
  // Now we can create the DiagnosticsEngine with a properly-filled-out
  // DiagnosticOptions instance.
  TextDiagnosticPrinter *DiagClient
    = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts);
  DiagClient->setPrefix(llvm::sys::path::filename(Path.str()));
  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());

  DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
  ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false);

  Driver TheDriver(Path.str(), llvm::sys::getDefaultTargetTriple(),
                   "a.out", Diags);

  // Attempt to find the original path used to invoke the driver, to determine
  // the installed path. We do this manually, because we want to support that
  // path being a symlink.
  {
    SmallString<128> InstalledPath(argv[0]);

    // Do a PATH lookup, if there are no directory components.
    if (llvm::sys::path::filename(InstalledPath) == InstalledPath) {
      llvm::sys::Path Tmp = llvm::sys::Program::FindProgramByName(
        llvm::sys::path::filename(InstalledPath.str()));
      if (!Tmp.empty())
        InstalledPath = Tmp.str();
    }
    llvm::sys::fs::make_absolute(InstalledPath);
    InstalledPath = llvm::sys::path::parent_path(InstalledPath);
    bool exists;
    if (!llvm::sys::fs::exists(InstalledPath.str(), exists) && exists)
      TheDriver.setInstalledDir(InstalledPath);
  }

  llvm::InitializeAllTargets();
  ParseProgName(argv, SavedStrings, TheDriver);

  // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE.
  TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS");
  if (TheDriver.CCPrintOptions)
    TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE");

  // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE.
  TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS");
  if (TheDriver.CCPrintHeaders)
    TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE");

  // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE.
  TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS");
  if (TheDriver.CCLogDiagnostics)
    TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE");

  OwningPtr<Compilation> C(TheDriver.BuildCompilation(argv));
  int Res = 0;
  SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
  if (C.get())
    Res = TheDriver.ExecuteCompilation(*C, FailingCommands);

  // Force a crash to test the diagnostics.
  if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) {
    Diags.Report(diag::err_drv_force_crash) << "FORCE_CLANG_DIAGNOSTICS_CRASH";
    const Command *FailingCommand = 0;
    FailingCommands.push_back(std::make_pair(-1, FailingCommand));
  }

  for (SmallVectorImpl< std::pair<int, const Command *> >::iterator it =
         FailingCommands.begin(), ie = FailingCommands.end(); it != ie; ++it) {
    int CommandRes = it->first;
    const Command *FailingCommand = it->second;
    if (!Res)
      Res = CommandRes;

    // If result status is < 0, then the driver command signalled an error.
    // If result status is 70, then the driver command reported a fatal error.
    // In these cases, generate additional diagnostic information if possible.
    if (CommandRes < 0 || CommandRes == 70) {
      TheDriver.generateCompilationDiagnostics(*C, FailingCommand);
      break;
    }
  }

  // If any timers were active but haven't been destroyed yet, print their
  // results now.  This happens in -disable-free mode.
  llvm::TimerGroup::printAll(llvm::errs());
  
  llvm::llvm_shutdown();

#ifdef _WIN32
  // Exit status should not be negative on Win32, unless abnormal termination.
  // Once abnormal termiation was caught, negative status should not be
  // propagated.
  if (Res < 0)
    Res = 1;
#endif

  // If we have multiple failing commands, we return the result of the first
  // failing command.
  return Res;
}
Example #18
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
/// iterations before branching into the unrolled loop.  UnrollLoop will not
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                      bool AllowRuntime, bool AllowExpensiveTripCount,
                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
                      DominatorTree *DT, AssumptionCache *AC,
                      bool PreserveLCSSA) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;
  SmallVector<BasicBlock *, 4> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();

  // Go through all exits of L and see if there are any phi-nodes there. We just
  // conservatively assume that they're inserted to preserve LCSSA form, which
  // means that complete unrolling might break this form. We need to either fix
  // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
  // now we just recompute LCSSA for the outer loop, but it should be possible
  // to fix it in-place.
  bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
      std::any_of(ExitBlocks.begin(), ExitBlocks.end(),
                  [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); });

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  // Loops containing convergent instructions must have a count that divides
  // their TripMultiple.
  DEBUG(
      {
        bool HasConvergent = false;
        for (auto &BB : L->blocks())
          for (auto &I : *BB)
            if (auto CS = CallSite(&I))
              HasConvergent |= CS.isConvergent();
        assert((!HasConvergent || TripMultiple % Count == 0) &&
               "Unroll count must divide trip multiple if loop contains a "
               "convergent operation.");
      });
Example #19
0
/// UnswitchNontrivialCondition - We determined that the loop is profitable 
/// to unswitch when LIC equal Val.  Split it into loop versions and test the 
/// condition outside of either loop.  Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
                                               Loop *L) {
  Function *F = loopHeader->getParent();
  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
        << loopHeader->getName() << " [" << L->getBlocks().size()
        << " blocks] in Function " << F->getName()
        << " when '" << *Val << "' == " << *LIC << "\n");

  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
    SE->forgetLoop(L);

  LoopBlocks.clear();
  NewBlocks.clear();

  // First step, split the preheader and exit blocks, and add these blocks to
  // the LoopBlocks list.
  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
  LoopBlocks.push_back(NewPreheader);

  // We want the loop to come after the preheader, but before the exit blocks.
  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);

  // Split all of the edges from inside the loop to their exit blocks.  Update
  // the appropriate Phi nodes as we do so.
  SplitExitEdges(L, ExitBlocks);

  // The exit blocks may have been changed due to edge splitting, recompute.
  ExitBlocks.clear();
  L->getUniqueExitBlocks(ExitBlocks);

  // Add exit blocks to the loop blocks.
  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());

  // Next step, clone all of the basic blocks that make up the loop (including
  // the loop preheader and exit blocks), keeping track of the mapping between
  // the instructions and blocks.
  NewBlocks.reserve(LoopBlocks.size());
  ValueToValueMapTy VMap;
  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
    NewBlocks.push_back(NewBB);
    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
  }

  // Splice the newly inserted blocks into the function right before the
  // original preheader.
  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
                                NewBlocks[0], F->end());

  // Now we create the new Loop object for the versioned loop.
  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
  Loop *ParentLoop = L->getParentLoop();
  if (ParentLoop) {
    // Make sure to add the cloned preheader and exit blocks to the parent loop
    // as well.
    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
  }
  
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
    // The new exit block should be in the same loop as the old one.
    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
    
    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
           "Exit block should have been split to have one successor!");
    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);

    // If the successor of the exit block had PHI nodes, add an entry for
    // NewExit.
    PHINode *PN;
    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
      PN = cast<PHINode>(I);
      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
      ValueToValueMapTy::iterator It = VMap.find(V);
      if (It != VMap.end()) V = It->second;
      PN->addIncoming(V, NewExit);
    }
  }

  // Rewrite the code to refer to itself.
  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
  
  // Rewrite the original preheader to select between versions of the loop.
  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
         "Preheader splitting did not work correctly!");

  // Emit the new branch that selects between the two versions of this loop.
  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
  LPM->deleteSimpleAnalysisValue(OldBR, L);
  OldBR->eraseFromParent();

  LoopProcessWorklist.push_back(NewLoop);
  redoLoop = true;

  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
  // deletes the instruction (for example by simplifying a PHI that feeds into
  // the condition that we're unswitching on), we don't rewrite the second
  // iteration.
  WeakVH LICHandle(LIC);
  
  // Now we rewrite the original code to know that the condition is true and the
  // new code to know that the condition is false.
  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);

  // It's possible that simplifying one loop could cause the other to be
  // changed to another value or a constant.  If its a constant, don't simplify
  // it.
  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
      LICHandle && !isa<Constant>(LICHandle))
    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
Example #20
0
// A soft instruction can be changed to work in other domains given by mask.
void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
  // Bitmask of available domains for this instruction after taking collapsed
  // operands into account.
  unsigned available = mask;

  // Scan the explicit use operands for incoming domains.
  SmallVector<int, 4> used;
  if (LiveRegs)
    for (unsigned i = mi->getDesc().getNumDefs(),
                  e = mi->getDesc().getNumOperands(); i != e; ++i) {
      MachineOperand &mo = mi->getOperand(i);
      if (!mo.isReg()) continue;
      for (int rx : regIndices(mo.getReg())) {
        DomainValue *dv = LiveRegs[rx].Value;
        if (dv == nullptr)
          continue;
        // Bitmask of domains that dv and available have in common.
        unsigned common = dv->getCommonDomains(available);
        // Is it possible to use this collapsed register for free?
        if (dv->isCollapsed()) {
          // Restrict available domains to the ones in common with the operand.
          // If there are no common domains, we must pay the cross-domain
          // penalty for this operand.
          if (common) available = common;
        } else if (common)
          // Open DomainValue is compatible, save it for merging.
          used.push_back(rx);
        else
          // Open DomainValue is not compatible with instruction. It is useless
          // now.
          kill(rx);
      }
    }

  // If the collapsed operands force a single domain, propagate the collapse.
  if (isPowerOf2_32(available)) {
    unsigned domain = countTrailingZeros(available);
    TII->setExecutionDomain(mi, domain);
    visitHardInstr(mi, domain);
    return;
  }

  // Kill off any remaining uses that don't match available, and build a list of
  // incoming DomainValues that we want to merge.
  SmallVector<LiveReg, 4> Regs;
  for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
    int rx = *i;
    assert(LiveRegs && "no space allocated for live registers");
    const LiveReg &LR = LiveRegs[rx];
    // This useless DomainValue could have been missed above.
    if (!LR.Value->getCommonDomains(available)) {
      kill(rx);
      continue;
    }
    // Sorted insertion.
    bool Inserted = false;
    for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
           i != e && !Inserted; ++i) {
      if (LR.Def < i->Def) {
        Inserted = true;
        Regs.insert(i, LR);
      }
    }
    if (!Inserted)
      Regs.push_back(LR);
  }

  // doms are now sorted in order of appearance. Try to merge them all, giving
  // priority to the latest ones.
  DomainValue *dv = nullptr;
  while (!Regs.empty()) {
    if (!dv) {
      dv = Regs.pop_back_val().Value;
      // Force the first dv to match the current instruction.
      dv->AvailableDomains = dv->getCommonDomains(available);
      assert(dv->AvailableDomains && "Domain should have been filtered");
      continue;
    }

    DomainValue *Latest = Regs.pop_back_val().Value;
    // Skip already merged values.
    if (Latest == dv || Latest->Next)
      continue;
    if (merge(dv, Latest))
      continue;

    // If latest didn't merge, it is useless now. Kill all registers using it.
    for (int i : used) {
      assert(LiveRegs && "no space allocated for live registers");
      if (LiveRegs[i].Value == Latest)
        kill(i);
    }
  }

  // dv is the DomainValue we are going to use for this instruction.
  if (!dv) {
    dv = alloc();
    dv->AvailableDomains = available;
  }
  dv->Instrs.push_back(mi);

  // Finally set all defs and non-collapsed uses to dv. We must iterate through
  // all the operators, including imp-def ones.
  for (MachineInstr::mop_iterator ii = mi->operands_begin(),
                                  ee = mi->operands_end();
                                  ii != ee; ++ii) {
    MachineOperand &mo = *ii;
    if (!mo.isReg()) continue;
    for (int rx : regIndices(mo.getReg())) {
      if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
        kill(rx);
        setLiveReg(rx, dv);
      }
    }
  }
}
Example #21
0
void Diagnostic::
FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
                 SmallVectorImpl<char> &OutStr) const {

  /// FormattedArgs - Keep track of all of the arguments formatted by
  /// ConvertArgToString and pass them into subsequent calls to
  /// ConvertArgToString, allowing the implementation to avoid redundancies in
  /// obvious cases.
  SmallVector<DiagnosticsEngine::ArgumentValue, 8> FormattedArgs;

  /// QualTypeVals - Pass a vector of arrays so that QualType names can be
  /// compared to see if more information is needed to be printed.
  SmallVector<intptr_t, 2> QualTypeVals;
  SmallVector<char, 64> Tree;

  for (unsigned i = 0, e = getNumArgs(); i < e; ++i)
    if (getArgKind(i) == DiagnosticsEngine::ak_qualtype)
      QualTypeVals.push_back(getRawArg(i));

  while (DiagStr != DiagEnd) {
    if (DiagStr[0] != '%') {
      // Append non-%0 substrings to Str if we have one.
      const char *StrEnd = std::find(DiagStr, DiagEnd, '%');
      OutStr.append(DiagStr, StrEnd);
      DiagStr = StrEnd;
      continue;
    } else if (ispunct(DiagStr[1])) {
      OutStr.push_back(DiagStr[1]);  // %% -> %.
      DiagStr += 2;
      continue;
    }

    // Skip the %.
    ++DiagStr;

    // This must be a placeholder for a diagnostic argument.  The format for a
    // placeholder is one of "%0", "%modifier0", or "%modifier{arguments}0".
    // The digit is a number from 0-9 indicating which argument this comes from.
    // The modifier is a string of digits from the set [-a-z]+, arguments is a
    // brace enclosed string.
    const char *Modifier = 0, *Argument = 0;
    unsigned ModifierLen = 0, ArgumentLen = 0;

    // Check to see if we have a modifier.  If so eat it.
    if (!isdigit(DiagStr[0])) {
      Modifier = DiagStr;
      while (DiagStr[0] == '-' ||
             (DiagStr[0] >= 'a' && DiagStr[0] <= 'z'))
        ++DiagStr;
      ModifierLen = DiagStr-Modifier;

      // If we have an argument, get it next.
      if (DiagStr[0] == '{') {
        ++DiagStr; // Skip {.
        Argument = DiagStr;

        DiagStr = ScanFormat(DiagStr, DiagEnd, '}');
        assert(DiagStr != DiagEnd && "Mismatched {}'s in diagnostic string!");
        ArgumentLen = DiagStr-Argument;
        ++DiagStr;  // Skip }.
      }
    }

    assert(isdigit(*DiagStr) && "Invalid format for argument in diagnostic");
    unsigned ArgNo = *DiagStr++ - '0';

    // Only used for type diffing.
    unsigned ArgNo2 = ArgNo;

    DiagnosticsEngine::ArgumentKind Kind = getArgKind(ArgNo);
    if (Kind == DiagnosticsEngine::ak_qualtype &&
        ModifierIs(Modifier, ModifierLen, "diff")) {
      Kind = DiagnosticsEngine::ak_qualtype_pair;
      assert(*DiagStr == ',' && isdigit(*(DiagStr + 1)) &&
             "Invalid format for diff modifier");
      ++DiagStr;  // Comma.
      ArgNo2 = *DiagStr++ - '0';
      assert(getArgKind(ArgNo2) == DiagnosticsEngine::ak_qualtype &&
             "Second value of type diff must be a qualtype");
    }
    
    switch (Kind) {
    // ---- STRINGS ----
    case DiagnosticsEngine::ak_std_string: {
      const std::string &S = getArgStdStr(ArgNo);
      assert(ModifierLen == 0 && "No modifiers for strings yet");
      OutStr.append(S.begin(), S.end());
      break;
    }
    case DiagnosticsEngine::ak_c_string: {
      const char *S = getArgCStr(ArgNo);
      assert(ModifierLen == 0 && "No modifiers for strings yet");

      // Don't crash if get passed a null pointer by accident.
      if (!S)
        S = "(null)";

      OutStr.append(S, S + strlen(S));
      break;
    }
    // ---- INTEGERS ----
    case DiagnosticsEngine::ak_sint: {
      int Val = getArgSInt(ArgNo);

      if (ModifierIs(Modifier, ModifierLen, "select")) {
        HandleSelectModifier(*this, (unsigned)Val, Argument, ArgumentLen,
                             OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "s")) {
        HandleIntegerSModifier(Val, OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "plural")) {
        HandlePluralModifier(*this, (unsigned)Val, Argument, ArgumentLen,
                             OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "ordinal")) {
        HandleOrdinalModifier((unsigned)Val, OutStr);
      } else {
        assert(ModifierLen == 0 && "Unknown integer modifier");
        llvm::raw_svector_ostream(OutStr) << Val;
      }
      break;
    }
    case DiagnosticsEngine::ak_uint: {
      unsigned Val = getArgUInt(ArgNo);

      if (ModifierIs(Modifier, ModifierLen, "select")) {
        HandleSelectModifier(*this, Val, Argument, ArgumentLen, OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "s")) {
        HandleIntegerSModifier(Val, OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "plural")) {
        HandlePluralModifier(*this, (unsigned)Val, Argument, ArgumentLen,
                             OutStr);
      } else if (ModifierIs(Modifier, ModifierLen, "ordinal")) {
        HandleOrdinalModifier(Val, OutStr);
      } else {
        assert(ModifierLen == 0 && "Unknown integer modifier");
        llvm::raw_svector_ostream(OutStr) << Val;
      }
      break;
    }
    // ---- NAMES and TYPES ----
    case DiagnosticsEngine::ak_identifierinfo: {
      const IdentifierInfo *II = getArgIdentifier(ArgNo);
      assert(ModifierLen == 0 && "No modifiers for strings yet");

      // Don't crash if get passed a null pointer by accident.
      if (!II) {
        const char *S = "(null)";
        OutStr.append(S, S + strlen(S));
        continue;
      }

      llvm::raw_svector_ostream(OutStr) << '\'' << II->getName() << '\'';
      break;
    }
    case DiagnosticsEngine::ak_qualtype:
    case DiagnosticsEngine::ak_declarationname:
    case DiagnosticsEngine::ak_nameddecl:
    case DiagnosticsEngine::ak_nestednamespec:
    case DiagnosticsEngine::ak_declcontext:
      getDiags()->ConvertArgToString(Kind, getRawArg(ArgNo),
                                     Modifier, ModifierLen,
                                     Argument, ArgumentLen,
                                     FormattedArgs.data(), FormattedArgs.size(),
                                     OutStr, QualTypeVals);
      break;
    case DiagnosticsEngine::ak_qualtype_pair:
      // Create a struct with all the info needed for printing.
      TemplateDiffTypes TDT;
      TDT.FromType = getRawArg(ArgNo);
      TDT.ToType = getRawArg(ArgNo2);
      TDT.ElideType = getDiags()->ElideType;
      TDT.ShowColors = getDiags()->ShowColors;
      TDT.TemplateDiffUsed = false;
      intptr_t val = reinterpret_cast<intptr_t>(&TDT);

      const char *ArgumentEnd = Argument + ArgumentLen;
      const char *Pipe = ScanFormat(Argument, ArgumentEnd, '|');

      // Print the tree.  If this diagnostic already has a tree, skip the
      // second tree.
      if (getDiags()->PrintTemplateTree && Tree.empty()) {
        TDT.PrintFromType = true;
        TDT.PrintTree = true;
        getDiags()->ConvertArgToString(Kind, val,
                                       Modifier, ModifierLen,
                                       Argument, ArgumentLen,
                                       FormattedArgs.data(),
                                       FormattedArgs.size(),
                                       Tree, QualTypeVals);
        // If there is no tree information, fall back to regular printing.
        if (!Tree.empty()) {
          FormatDiagnostic(Pipe + 1, ArgumentEnd, OutStr);
          break;
        }
      }

      // Non-tree printing, also the fall-back when tree printing fails.
      // The fall-back is triggered when the types compared are not templates.
      const char *FirstDollar = ScanFormat(Argument, ArgumentEnd, '$');
      const char *SecondDollar = ScanFormat(FirstDollar + 1, ArgumentEnd, '$');

      // Append before text
      FormatDiagnostic(Argument, FirstDollar, OutStr);

      // Append first type
      TDT.PrintTree = false;
      TDT.PrintFromType = true;
      getDiags()->ConvertArgToString(Kind, val,
                                     Modifier, ModifierLen,
                                     Argument, ArgumentLen,
                                     FormattedArgs.data(), FormattedArgs.size(),
                                     OutStr, QualTypeVals);
      if (!TDT.TemplateDiffUsed)
        FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype,
                                               TDT.FromType));

      // Append middle text
      FormatDiagnostic(FirstDollar + 1, SecondDollar, OutStr);

      // Append second type
      TDT.PrintFromType = false;
      getDiags()->ConvertArgToString(Kind, val,
                                     Modifier, ModifierLen,
                                     Argument, ArgumentLen,
                                     FormattedArgs.data(), FormattedArgs.size(),
                                     OutStr, QualTypeVals);
      if (!TDT.TemplateDiffUsed)
        FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype,
                                               TDT.ToType));

      // Append end text
      FormatDiagnostic(SecondDollar + 1, Pipe, OutStr);
      break;
    }
    
    // Remember this argument info for subsequent formatting operations.  Turn
    // std::strings into a null terminated string to make it be the same case as
    // all the other ones.
    if (Kind == DiagnosticsEngine::ak_qualtype_pair)
      continue;
    else if (Kind != DiagnosticsEngine::ak_std_string)
      FormattedArgs.push_back(std::make_pair(Kind, getRawArg(ArgNo)));
    else
      FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_c_string,
                                        (intptr_t)getArgStdStr(ArgNo).c_str()));
    
  }

  // Append the type tree to the end of the diagnostics.
  OutStr.append(Tree.begin(), Tree.end());
}
Example #22
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
/// iterations before branching into the unrolled loop.  UnrollLoop will not
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, bool AllowExpensiveTripCount,
                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
                      DominatorTree *DT, AssumptionCache *AC,
                      bool PreserveLCSSA) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  // Loops with indirectbr cannot be cloned.
  if (!L->isSafeToClone()) {
    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;
  SmallVector<BasicBlock *, 4> ExitBlocks;
  L->getExitBlocks(ExitBlocks);
  Loop *ParentL = L->getParentLoop();
  bool AllExitsAreInsideParentLoop = !ParentL ||
      std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
                  [&](BasicBlock *BB) { return ParentL->contains(BB); });

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount &&
      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
                               PreserveLCSSA))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  // Report the unrolling decision.
  DebugLoc LoopLoc = L->getStartLoc();
  Function *F = Header->getParent();
  LLVMContext &Ctx = F->getContext();

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                           Twine("completely unrolled loop with ") +
                               Twine(TripCount) + " iterations");
  } else {
    auto EmitDiag = [&](const Twine &T) {
      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
                             "unrolled loop by a factor of " + Twine(Count) +
                                 T);
    };

    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
      EmitDiag(" with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;
    SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
    NewLoops[L] = L;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Tell LI about New.
      if (*BB == Header) {
        assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
        L->addBasicBlockToLoop(New, *LI);
      } else {
        // Figure out which loop New is in.
        const Loop *OldLoop = LI->getLoopFor(*BB);
        assert(OldLoop && "Should (at least) be in the loop being unrolled!");

        Loop *&NewLoop = NewLoops[OldLoop];
        if (!NewLoop) {
          // Found a new sub-loop.
          assert(*BB == OldLoop->getHeader() &&
                 "Header should be first in RPO");

          Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
          assert(NewLoopParent &&
                 "Expected parent loop before sub-loop in RPO");
          NewLoop = new Loop;
          NewLoopParent->addChildLoop(NewLoop);

          // Forget the old loop, since its inputs may have changed.
          if (SE)
            SE->forgetLoop(OldLoop);
        }
        NewLoop->addBasicBlockToLoop(New, *LI);
      }

      if (*BB == Header)
        // Loop over all of the PHI nodes in the block, changing them to use
        // the incoming values from the previous block.
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(&*I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll) {
      if (j == 0)
        Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<Loop *, 4> ForgottenLoops;
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
                                                      ForgottenLoops))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: We could register any cloned assumptions instead of clearing the
  // whole function's cache.
  AC->clear();

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DT)
    DT->recalculate(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, DT, LI, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const DataLayout &DL = Header->getModule()->getDataLayout();
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = &*I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst, DL))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;

  Loop *OuterL = L->getParentLoop();
  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->updateUnloop(L);;

  // If we have a pass and a DominatorTree we should re-simplify impacted loops
  // to ensure subsequent analyses can rely on this form. We want to simplify
  // at least one layer outside of the loop that was unrolled so that any
  // changes to the parent loop exposed by the unrolling are considered.
  if (DT) {
    if (!OuterL && !CompletelyUnroll)
      OuterL = L;
    if (OuterL) {
      bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);

      // LCSSA must be performed on the outermost affected loop. The unrolled
      // loop's last loop latch is guaranteed to be in the outermost loop after
      // LoopInfo's been updated by updateUnloop.
      Loop *LatchLoop = LI->getLoopFor(Latches.back());
      if (!OuterL->contains(LatchLoop))
        while (OuterL->getParentLoop() != LatchLoop)
          OuterL = OuterL->getParentLoop();

      if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
        formLCSSARecursively(*OuterL, *DT, LI, SE);
      else
        assert(OuterL->isLCSSAForm(*DT) &&
               "Loops should be in LCSSA form after loop-unroll.");
    }
  }

  return true;
}
Example #23
0
/// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled,
/// this routine is called to collect the tokens for an MS asm statement.
///
/// [MS]  ms-asm-statement:
///         ms-asm-block
///         ms-asm-block ms-asm-statement
///
/// [MS]  ms-asm-block:
///         '__asm' ms-asm-line '\n'
///         '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt]
///
/// [MS]  ms-asm-instruction-block
///         ms-asm-line
///         ms-asm-line '\n' ms-asm-instruction-block
///
StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
  SourceManager &SrcMgr = PP.getSourceManager();
  SourceLocation EndLoc = AsmLoc;
  SmallVector<Token, 4> AsmToks;

  bool SingleLineMode = true;
  unsigned BraceNesting = 0;
  unsigned short savedBraceCount = BraceCount;
  bool InAsmComment = false;
  FileID FID;
  unsigned LineNo = 0;
  unsigned NumTokensRead = 0;
  SmallVector<SourceLocation, 4> LBraceLocs;
  bool SkippedStartOfLine = false;

  if (Tok.is(tok::l_brace)) {
    // Braced inline asm: consume the opening brace.
    SingleLineMode = false;
    BraceNesting = 1;
    EndLoc = ConsumeBrace();
    LBraceLocs.push_back(EndLoc);
    ++NumTokensRead;
  } else {
    // Single-line inline asm; compute which line it is on.
    std::pair<FileID, unsigned> ExpAsmLoc =
        SrcMgr.getDecomposedExpansionLoc(EndLoc);
    FID = ExpAsmLoc.first;
    LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second);
    LBraceLocs.push_back(SourceLocation());
  }

  SourceLocation TokLoc = Tok.getLocation();
  do {
    // If we hit EOF, we're done, period.
    if (isEofOrEom())
      break;

    if (!InAsmComment && Tok.is(tok::l_brace)) {
      // Consume the opening brace.
      SkippedStartOfLine = Tok.isAtStartOfLine();
      EndLoc = ConsumeBrace();
      BraceNesting++;
      LBraceLocs.push_back(EndLoc);
      TokLoc = Tok.getLocation();
      ++NumTokensRead;
      continue;
    } else if (!InAsmComment && Tok.is(tok::semi)) {
      // A semicolon in an asm is the start of a comment.
      InAsmComment = true;
      if (!SingleLineMode) {
        // Compute which line the comment is on.
        std::pair<FileID, unsigned> ExpSemiLoc =
            SrcMgr.getDecomposedExpansionLoc(TokLoc);
        FID = ExpSemiLoc.first;
        LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second);
      }
    } else if (SingleLineMode || InAsmComment) {
      // If end-of-line is significant, check whether this token is on a
      // new line.
      std::pair<FileID, unsigned> ExpLoc =
          SrcMgr.getDecomposedExpansionLoc(TokLoc);
      if (ExpLoc.first != FID ||
          SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) {
        // If this is a single-line __asm, we're done, except if the next
        // line begins with an __asm too, in which case we finish a comment
        // if needed and then keep processing the next line as a single
        // line __asm.
        bool isAsm = Tok.is(tok::kw_asm);
        if (SingleLineMode && !isAsm)
          break;
        // We're no longer in a comment.
        InAsmComment = false;
        if (isAsm) {
          LineNo = SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second);
          SkippedStartOfLine = Tok.isAtStartOfLine();
        }
      } else if (!InAsmComment && Tok.is(tok::r_brace)) {
        // In MSVC mode, braces only participate in brace matching and
        // separating the asm statements.  This is an intentional
        // departure from the Apple gcc behavior.
        if (!BraceNesting)
          break;
      }
    }
    if (!InAsmComment && BraceNesting && Tok.is(tok::r_brace) &&
        BraceCount == (savedBraceCount + BraceNesting)) {
      // Consume the closing brace.
      SkippedStartOfLine = Tok.isAtStartOfLine();
      EndLoc = ConsumeBrace();
      BraceNesting--;
      // Finish if all of the opened braces in the inline asm section were
      // consumed.
      if (BraceNesting == 0 && !SingleLineMode)
        break;
      else {
        LBraceLocs.pop_back();
        TokLoc = Tok.getLocation();
        ++NumTokensRead;
        continue;
      }
    }

    // Consume the next token; make sure we don't modify the brace count etc.
    // if we are in a comment.
    EndLoc = TokLoc;
    if (InAsmComment)
      PP.Lex(Tok);
    else {
      // Set the token as the start of line if we skipped the original start
      // of line token in case it was a nested brace.
      if (SkippedStartOfLine)
        Tok.setFlag(Token::StartOfLine);
      AsmToks.push_back(Tok);
      ConsumeAnyToken();
    }
    TokLoc = Tok.getLocation();
    ++NumTokensRead;
    SkippedStartOfLine = false;
  } while (1);

  if (BraceNesting && BraceCount != savedBraceCount) {
    // __asm without closing brace (this can happen at EOF).
    for (unsigned i = 0; i < BraceNesting; ++i) {
      Diag(Tok, diag::err_expected) << tok::r_brace;
      Diag(LBraceLocs.back(), diag::note_matching) << tok::l_brace;
      LBraceLocs.pop_back();
    }
    return StmtError();
  } else if (NumTokensRead == 0) {
    // Empty __asm.
    Diag(Tok, diag::err_expected) << tok::l_brace;
    return StmtError();
  }

  // Okay, prepare to use MC to parse the assembly.
  SmallVector<StringRef, 4> ConstraintRefs;
  SmallVector<Expr *, 4> Exprs;
  SmallVector<StringRef, 4> ClobberRefs;

  // We need an actual supported target.
  const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple();
  llvm::Triple::ArchType ArchTy = TheTriple.getArch();
  const std::string &TT = TheTriple.getTriple();
  const llvm::Target *TheTarget = nullptr;
  bool UnsupportedArch =
      (ArchTy != llvm::Triple::x86 && ArchTy != llvm::Triple::x86_64);
  if (UnsupportedArch) {
    Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName();
  } else {
    std::string Error;
    TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error);
    if (!TheTarget)
      Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error;
  }

  assert(!LBraceLocs.empty() && "Should have at least one location here");

  // If we don't support assembly, or the assembly is empty, we don't
  // need to instantiate the AsmParser, etc.
  if (!TheTarget || AsmToks.empty()) {
    return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, StringRef(),
                                  /*NumOutputs*/ 0, /*NumInputs*/ 0,
                                  ConstraintRefs, ClobberRefs, Exprs, EndLoc);
  }

  // Expand the tokens into a string buffer.
  SmallString<512> AsmString;
  SmallVector<unsigned, 8> TokOffsets;
  if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString))
    return StmtError();

  std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT));
  std::unique_ptr<llvm::MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT));
  // Get the instruction descriptor.
  std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
  std::unique_ptr<llvm::MCObjectFileInfo> MOFI(new llvm::MCObjectFileInfo());
  std::unique_ptr<llvm::MCSubtargetInfo> STI(
      TheTarget->createMCSubtargetInfo(TT, "", ""));

  llvm::SourceMgr TempSrcMgr;
  llvm::MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &TempSrcMgr);
  MOFI->InitMCObjectFileInfo(TheTriple, llvm::Reloc::Default,
                             llvm::CodeModel::Default, Ctx);
  std::unique_ptr<llvm::MemoryBuffer> Buffer =
      llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>");

  // Tell SrcMgr about this buffer, which is what the parser will pick up.
  TempSrcMgr.AddNewSourceBuffer(std::move(Buffer), llvm::SMLoc());

  std::unique_ptr<llvm::MCStreamer> Str(createNullStreamer(Ctx));
  std::unique_ptr<llvm::MCAsmParser> Parser(
      createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI));

  // FIXME: init MCOptions from sanitizer flags here.
  llvm::MCTargetOptions MCOptions;
  std::unique_ptr<llvm::MCTargetAsmParser> TargetParser(
      TheTarget->createMCAsmParser(*STI, *Parser, *MII, MCOptions));

  std::unique_ptr<llvm::MCInstPrinter> IP(
      TheTarget->createMCInstPrinter(llvm::Triple(TT), 1, *MAI, *MII, *MRI));

  // Change to the Intel dialect.
  Parser->setAssemblerDialect(1);
  Parser->setTargetParser(*TargetParser.get());
  Parser->setParsingInlineAsm(true);
  TargetParser->setParsingInlineAsm(true);

  ClangAsmParserCallback Callback(*this, AsmLoc, AsmString, AsmToks,
                                  TokOffsets);
  TargetParser->setSemaCallback(&Callback);
  TempSrcMgr.setDiagHandler(ClangAsmParserCallback::DiagHandlerCallback,
                            &Callback);

  unsigned NumOutputs;
  unsigned NumInputs;
  std::string AsmStringIR;
  SmallVector<std::pair<void *, bool>, 4> OpExprs;
  SmallVector<std::string, 4> Constraints;
  SmallVector<std::string, 4> Clobbers;
  if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR, NumOutputs,
                               NumInputs, OpExprs, Constraints, Clobbers,
                               MII.get(), IP.get(), Callback))
    return StmtError();

  // Filter out "fpsw".  Clang doesn't accept it, and it always lists flags and
  // fpsr as clobbers.
  auto End = std::remove(Clobbers.begin(), Clobbers.end(), "fpsw");
  Clobbers.erase(End, Clobbers.end());

  // Build the vector of clobber StringRefs.
  ClobberRefs.insert(ClobberRefs.end(), Clobbers.begin(), Clobbers.end());

  // Recast the void pointers and build the vector of constraint StringRefs.
  unsigned NumExprs = NumOutputs + NumInputs;
  ConstraintRefs.resize(NumExprs);
  Exprs.resize(NumExprs);
  for (unsigned i = 0, e = NumExprs; i != e; ++i) {
    Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first);
    if (!OpExpr)
      return StmtError();

    // Need address of variable.
    if (OpExprs[i].second)
      OpExpr =
          Actions.BuildUnaryOp(getCurScope(), AsmLoc, UO_AddrOf, OpExpr).get();

    ConstraintRefs[i] = StringRef(Constraints[i]);
    Exprs[i] = OpExpr;
  }

  // FIXME: We should be passing source locations for better diagnostics.
  return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmStringIR,
                                NumOutputs, NumInputs, ConstraintRefs,
                                ClobberRefs, Exprs, EndLoc);
}
Example #24
0
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
/// observable behavior of the program other than finite running time.  Note
/// we do ensure that this never remove a loop that might be infinite, as doing
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
  // We can only remove the loop if there is a preheader that we can
  // branch from after removing it.
  BasicBlock* preheader = L->getLoopPreheader();
  if (!preheader)
    return false;

  // If LoopSimplify form is not available, stay out of trouble.
  if (!L->hasDedicatedExits())
    return false;

  // We can't remove loops that contain subloops.  If the subloops were dead,
  // they would already have been removed in earlier executions of this pass.
  if (L->begin() != L->end())
    return false;

  SmallVector<BasicBlock*, 4> exitingBlocks;
  L->getExitingBlocks(exitingBlocks);

  SmallVector<BasicBlock*, 4> exitBlocks;
  L->getUniqueExitBlocks(exitBlocks);

  // We require that the loop only have a single exit block.  Otherwise, we'd
  // be in the situation of needing to be able to solve statically which exit
  // block will be branched to, or trying to preserve the branching logic in
  // a loop invariant manner.
  if (exitBlocks.size() != 1)
    return false;

  // Finally, we have to check that the loop really is dead.
  bool Changed = false;
  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
    return Changed;

  // Don't remove loops for which we can't solve the trip count.
  // They could be infinite, in which case we'd be changing program behavior.
  ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(S))
    return Changed;

  // Now that we know the removal is safe, remove the loop by changing the
  // branch from the preheader to go to the single exit block.
  BasicBlock* exitBlock = exitBlocks[0];

  // Because we're deleting a large chunk of code at once, the sequence in which
  // we remove things is very important to avoid invalidation issues.  Don't
  // mess with this unless you have good reason and know what you're doing.

  // Tell ScalarEvolution that the loop is deleted. Do this before
  // deleting the loop so that ScalarEvolution can look at the loop
  // to determine what it needs to clean up.
  SE.forgetLoop(L);

  // Connect the preheader directly to the exit block.
  TerminatorInst* TI = preheader->getTerminator();
  TI->replaceUsesOfWith(L->getHeader(), exitBlock);

  // Rewrite phis in the exit block to get their inputs from
  // the preheader instead of the exiting block.
  BasicBlock* exitingBlock = exitingBlocks[0];
  BasicBlock::iterator BI = exitBlock->begin();
  while (PHINode* P = dyn_cast<PHINode>(BI)) {
    int j = P->getBasicBlockIndex(exitingBlock);
    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
    P->setIncomingBlock(j, preheader);
    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
      P->removeIncomingValue(exitingBlocks[i]);
    ++BI;
  }

  // Update the dominator tree and remove the instructions and blocks that will
  // be deleted from the reference counting scheme.
  DominatorTree& DT = getAnalysis<DominatorTree>();
  SmallVector<DomTreeNode*, 8> ChildNodes;
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI) {
    // Move all of the block's children to be children of the preheader, which
    // allows us to remove the domtree entry for the block.
    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
    for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
         DE = ChildNodes.end(); DI != DE; ++DI) {
      DT.changeImmediateDominator(*DI, DT[preheader]);
    }

    ChildNodes.clear();
    DT.eraseNode(*LI);

    // Remove the block from the reference counting scheme, so that we can
    // delete it freely later.
    (*LI)->dropAllReferences();
  }

  // Erase the instructions and the blocks without having to worry
  // about ordering because we already dropped the references.
  // NOTE: This iteration is safe because erasing the block does not remove its
  // entry from the loop's block list.  We do that in the next section.
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI)
    (*LI)->eraseFromParent();

  // Finally, the blocks from loopinfo.  This has to happen late because
  // otherwise our loop iterators won't work.
  LoopInfo& loopInfo = getAnalysis<LoopInfo>();
  SmallPtrSet<BasicBlock*, 8> blocks;
  blocks.insert(L->block_begin(), L->block_end());
  for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
       E = blocks.end(); I != E; ++I)
    loopInfo.removeBlock(*I);

  // The last step is to inform the loop pass manager that we've
  // eliminated this loop.
  LPM.deleteLoopFromQueue(L);
  Changed = true;

  ++NumDeleted;

  return Changed;
}
Example #25
0
/// Remove dead functions that are not included in DNR (Do Not Remove) list.
bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
                                            bool AlwaysInlineOnly) {
  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
  SmallVector<Function *, 16> DeadFunctionsInComdats;

  auto RemoveCGN = [&](CallGraphNode *CGN) {
    // Remove any call graph edges from the function to its callees.
    CGN->removeAllCalledFunctions();

    // Remove any edges from the external node to the function's call graph
    // node.  These edges might have been made irrelegant due to
    // optimization of the program.
    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);

    // Removing the node for callee from the call graph and delete it.
    FunctionsToRemove.push_back(CGN);
  };

  // Scan for all of the functions, looking for ones that should now be removed
  // from the program.  Insert the dead ones in the FunctionsToRemove set.
  for (const auto &I : CG) {
    CallGraphNode *CGN = I.second.get();
    Function *F = CGN->getFunction();
    if (!F || F->isDeclaration())
      continue;

    // Handle the case when this function is called and we only want to care
    // about always-inline functions. This is a bit of a hack to share code
    // between here and the InlineAlways pass.
    if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
      continue;

    // If the only remaining users of the function are dead constants, remove
    // them.
    F->removeDeadConstantUsers();

    if (!F->isDefTriviallyDead())
      continue;

    // It is unsafe to drop a function with discardable linkage from a COMDAT
    // without also dropping the other members of the COMDAT.
    // The inliner doesn't visit non-function entities which are in COMDAT
    // groups so it is unsafe to do so *unless* the linkage is local.
    if (!F->hasLocalLinkage()) {
      if (F->hasComdat()) {
        DeadFunctionsInComdats.push_back(F);
        continue;
      }
    }

    RemoveCGN(CGN);
  }
  if (!DeadFunctionsInComdats.empty()) {
    // Filter out the functions whose comdats remain alive.
    filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
    // Remove the rest.
    for (Function *F : DeadFunctionsInComdats)
      RemoveCGN(CG[F]);
  }

  if (FunctionsToRemove.empty())
    return false;

  // Now that we know which functions to delete, do so.  We didn't want to do
  // this inline, because that would invalidate our CallGraph::iterator
  // objects. :(
  //
  // Note that it doesn't matter that we are iterating over a non-stable order
  // here to do this, it doesn't matter which order the functions are deleted
  // in.
  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
  FunctionsToRemove.erase(
      std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
      FunctionsToRemove.end());
  for (CallGraphNode *CGN : FunctionsToRemove) {
    delete CG.removeFunctionFromModule(CGN);
    ++NumDeleted;
  }
  return true;
}
static void createVectorVariantWrapper(llvm::Function *ScalarFunc,
                                       llvm::Function *VectorFunc,
                                       unsigned VLen,
                                       const SmallVectorImpl<ParamInfo> &Info) {
  assert(ScalarFunc->arg_size() == Info.size() &&
         "Wrong number of parameter infos");
  assert((VLen & (VLen - 1)) == 0 && "VLen must be a power-of-2");

  bool IsMasked = VectorFunc->arg_size() == ScalarFunc->arg_size() + 1;
  llvm::LLVMContext &Context = ScalarFunc->getContext();
  llvm::BasicBlock *Entry
    = llvm::BasicBlock::Create(Context, "entry", VectorFunc);
  llvm::BasicBlock *LoopCond
    = llvm::BasicBlock::Create(Context, "loop.cond", VectorFunc);
  llvm::BasicBlock *LoopBody
    = llvm::BasicBlock::Create(Context, "loop.body", VectorFunc);
  llvm::BasicBlock *MaskOn
    = IsMasked ? llvm::BasicBlock::Create(Context, "mask_on", VectorFunc) : 0;
  llvm::BasicBlock *MaskOff
    = IsMasked ? llvm::BasicBlock::Create(Context, "mask_off", VectorFunc) : 0;
  llvm::BasicBlock *LoopStep
    = llvm::BasicBlock::Create(Context, "loop.step", VectorFunc);
  llvm::BasicBlock *LoopEnd
    = llvm::BasicBlock::Create(Context, "loop.end", VectorFunc);

  llvm::Value *VectorRet = 0;
  SmallVector<llvm::Value*, 4> VectorArgs;

  // The loop counter.
  llvm::Type *IndexTy = llvm::Type::getInt32Ty(Context);
  llvm::Value *Index = 0;
  llvm::Value *Mask = 0;

  // Copy the names from the scalar args to the vector args.
  {
    llvm::Function::arg_iterator SI = ScalarFunc->arg_begin(),
                                 SE = ScalarFunc->arg_end(),
                                 VI = VectorFunc->arg_begin();
    for ( ; SI != SE; ++SI, ++VI)
      VI->setName(SI->getName());
    if (IsMasked)
      VI->setName("mask");
  }

  llvm::IRBuilder<> Builder(Entry);
  {
    if (!VectorFunc->getReturnType()->isVoidTy())
      VectorRet = Builder.CreateAlloca(VectorFunc->getReturnType());

    Index = Builder.CreateAlloca(IndexTy, 0, "index");
    Builder.CreateStore(llvm::ConstantInt::get(IndexTy, 0), Index);

    llvm::Function::arg_iterator VI = VectorFunc->arg_begin();
    for (SmallVectorImpl<ParamInfo>::const_iterator I = Info.begin(),
         IE = Info.end(); I != IE; ++I, ++VI) {
      llvm::Value *Arg = VI;
      switch (I->Kind) {
      case PK_Vector:
        assert(Arg->getType()->isVectorTy() && "Not a vector");
        assert(VLen == Arg->getType()->getVectorNumElements() &&
               "Wrong number of elements");
        break;
      case PK_LinearConst:
        Arg = buildLinearArg(Builder, VLen, Arg,
          cast<llvm::ConstantAsMetadata>(I->Step)->getValue());
        Arg->setName(VI->getName() + ".linear");
        break;
      case PK_Linear: {
        unsigned Number =
          cast<llvm::ConstantInt>(
            cast<llvm::ConstantAsMetadata>(I->Step)->getValue())->getZExtValue();
        llvm::Function::arg_iterator ArgI = VectorFunc->arg_begin();
        std::advance(ArgI, Number);
        llvm::Value *Step = ArgI;
        Arg = buildLinearArg(Builder, VLen, Arg, Step);
        Arg->setName(VI->getName() + ".linear");
      } break;
      case PK_Uniform:
        Arg = Builder.CreateVectorSplat(VLen, Arg);
        Arg->setName(VI->getName() + ".uniform");
        break;
      }
      VectorArgs.push_back(Arg);
    }

    if (IsMasked)
      Mask = buildMask(Builder, VLen, VI);

    Builder.CreateBr(LoopCond);
  }

  Builder.SetInsertPoint(LoopCond);
  {
    llvm::Value *Cond = Builder.CreateICmpULT(
        Builder.CreateLoad(Index), llvm::ConstantInt::get(IndexTy, VLen));
    Builder.CreateCondBr(Cond, LoopBody, LoopEnd);
  }

  llvm::Value *VecIndex = 0;

  Builder.SetInsertPoint(LoopBody);
  {
    VecIndex = Builder.CreateLoad(Index);
    if (IsMasked) {
      llvm::Value *ScalarMask = Builder.CreateExtractElement(Mask, VecIndex);
      Builder.CreateCondBr(ScalarMask, MaskOn, MaskOff);
    }
  }

  Builder.SetInsertPoint(IsMasked ? MaskOn : LoopBody);
  {
    // Build the argument list for the scalar function by extracting element
    // 'VecIndex' from the vector arguments.
    SmallVector<llvm::Value*, 4> ScalarArgs;
    for (SmallVectorImpl<llvm::Value*>::iterator VI = VectorArgs.begin(),
         VE = VectorArgs.end(); VI != VE; ++VI) {
      assert((*VI)->getType()->isVectorTy() && "Not a vector");
      ScalarArgs.push_back(Builder.CreateExtractElement(*VI, VecIndex));
    }

    // Call the scalar function with the extracted scalar arguments.
    llvm::Value *ScalarRet = Builder.CreateCall(ScalarFunc, ScalarArgs);

    // If the function returns a value insert the scalar return value into the
    // vector return value.
    if (VectorRet) {
      llvm::Value *V = Builder.CreateLoad(VectorRet);
      V = Builder.CreateInsertElement(V, ScalarRet, VecIndex);
      Builder.CreateStore(V, VectorRet);
    }

    Builder.CreateBr(LoopStep);
  }

  if (IsMasked) {
    Builder.SetInsertPoint(MaskOff);
    if (VectorRet) {
      llvm::Value *V = Builder.CreateLoad(VectorRet);
      llvm::Value *Zero
        = llvm::Constant::getNullValue(ScalarFunc->getReturnType());
      V = Builder.CreateInsertElement(V, Zero, VecIndex);
      Builder.CreateStore(V, VectorRet);
    }
    Builder.CreateBr(LoopStep);
  }

  Builder.SetInsertPoint(LoopStep);
  {
    // Index = Index + 1
    VecIndex = Builder.CreateAdd(VecIndex, llvm::ConstantInt::get(IndexTy, 1));
    Builder.CreateStore(VecIndex, Index);
    Builder.CreateBr(LoopCond);
  }

  Builder.SetInsertPoint(LoopEnd);
  {
    if (VectorRet)
      Builder.CreateRet(Builder.CreateLoad(VectorRet));
    else
      Builder.CreateRetVoid();
  }
}
Example #27
0
static void emitImplicitValueConstructor(SILGenFunction &gen,
                                         ConstructorDecl *ctor) {
  RegularLocation Loc(ctor);
  Loc.markAutoGenerated();
  // FIXME: Handle 'self' along with the other arguments.
  auto *paramList = ctor->getParameterList(1);
  auto *selfDecl = ctor->getImplicitSelfDecl();
  auto selfTyCan = selfDecl->getType()->getInOutObjectType();
  auto selfIfaceTyCan = selfDecl->getInterfaceType()->getInOutObjectType();
  SILType selfTy = gen.getLoweredType(selfTyCan);

  // Emit the indirect return argument, if any.
  SILValue resultSlot;
  if (selfTy.isAddressOnly(gen.SGM.M) && gen.silConv.useLoweredAddresses()) {
    auto &AC = gen.getASTContext();
    auto VD = new (AC) ParamDecl(/*IsLet*/ false, SourceLoc(), SourceLoc(),
                                 AC.getIdentifier("$return_value"),
                                 SourceLoc(),
                                 AC.getIdentifier("$return_value"), Type(),
                                 ctor);
    VD->setInterfaceType(selfIfaceTyCan);
    resultSlot = gen.F.begin()->createFunctionArgument(selfTy, VD);
  }

  // Emit the elementwise arguments.
  SmallVector<RValue, 4> elements;
  for (size_t i = 0, size = paramList->size(); i < size; ++i) {
    auto &param = paramList->get(i);

    elements.push_back(
      emitImplicitValueConstructorArg(
          gen, Loc, param->getInterfaceType()->getCanonicalType(), ctor));
  }

  emitConstructorMetatypeArg(gen, ctor);

  auto *decl = selfTy.getStructOrBoundGenericStruct();
  assert(decl && "not a struct?!");

  // If we have an indirect return slot, initialize it in-place.
  if (resultSlot) {

    auto elti = elements.begin(), eltEnd = elements.end();
    for (VarDecl *field : decl->getStoredProperties()) {
      auto fieldTy = selfTy.getFieldType(field, gen.SGM.M);
      auto &fieldTL = gen.getTypeLowering(fieldTy);
      SILValue slot = gen.B.createStructElementAddr(Loc, resultSlot, field,
                                                    fieldTL.getLoweredType().getAddressType());
      InitializationPtr init(new KnownAddressInitialization(slot));

      // An initialized 'let' property has a single value specified by the
      // initializer - it doesn't come from an argument.
      if (!field->isStatic() && field->isLet() &&
          field->getParentInitializer()) {
#ifndef NDEBUG
        auto fieldTy = decl->getDeclContext()->mapTypeIntoContext(
            field->getInterfaceType());
        assert(fieldTy->isEqual(field->getParentInitializer()->getType())
               && "Checked by sema");
#endif

        // Cleanup after this initialization.
        FullExpr scope(gen.Cleanups, field->getParentPatternBinding());
        gen.emitExprInto(field->getParentInitializer(), init.get());
        continue;
      }

      assert(elti != eltEnd && "number of args does not match number of fields");
      (void)eltEnd;
      std::move(*elti).forwardInto(gen, Loc, init.get());
      ++elti;
    }
    gen.B.createReturn(ImplicitReturnLocation::getImplicitReturnLoc(Loc),
                       gen.emitEmptyTuple(Loc));
    return;
  }

  // Otherwise, build a struct value directly from the elements.
  SmallVector<SILValue, 4> eltValues;

  auto elti = elements.begin(), eltEnd = elements.end();
  for (VarDecl *field : decl->getStoredProperties()) {
    auto fieldTy = selfTy.getFieldType(field, gen.SGM.M);
    SILValue v;

    // An initialized 'let' property has a single value specified by the
    // initializer - it doesn't come from an argument.
    if (!field->isStatic() && field->isLet() && field->getParentInitializer()) {
      // Cleanup after this initialization.
      FullExpr scope(gen.Cleanups, field->getParentPatternBinding());
      v = gen.emitRValue(field->getParentInitializer())
             .forwardAsSingleStorageValue(gen, fieldTy, Loc);
    } else {
      assert(elti != eltEnd && "number of args does not match number of fields");
      (void)eltEnd;
      v = std::move(*elti).forwardAsSingleStorageValue(gen, fieldTy, Loc);
      ++elti;
    }

    eltValues.push_back(v);
  }

  SILValue selfValue = gen.B.createStruct(Loc, selfTy, eltValues);
  gen.B.createReturn(ImplicitReturnLocation::getImplicitReturnLoc(Loc),
                     selfValue);
  return;
}
Example #28
0
/// Emit the language-specific data that _except_handler3 and 4 expect. This is
/// functionally equivalent to the __C_specific_handler table, except it is
/// indexed by state number instead of IP.
void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
  MCStreamer &OS = *Asm->OutStreamer;
  const Function *F = MF->getFunction();
  StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());

  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
  emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);

  // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
  MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
  OS.EmitLabel(LSDALabel);

  const Function *Per = MMI->getPersonality();
  StringRef PerName = Per->getName();
  int BaseState = -1;
  if (PerName == "_except_handler4") {
    // The LSDA for _except_handler4 starts with this struct, followed by the
    // scope table:
    //
    // struct EH4ScopeTable {
    //   int32_t GSCookieOffset;
    //   int32_t GSCookieXOROffset;
    //   int32_t EHCookieOffset;
    //   int32_t EHCookieXOROffset;
    //   ScopeTableEntry ScopeRecord[];
    // };
    //
    // Only the EHCookieOffset field appears to vary, and it appears to be the
    // offset from the final saved SP value to the retaddr.
    OS.EmitIntValue(-2, 4);
    OS.EmitIntValue(0, 4);
    // FIXME: Calculate.
    OS.EmitIntValue(9999, 4);
    OS.EmitIntValue(0, 4);
    BaseState = -2;
  }

  // Build a list of pointers to LandingPadInfos and then sort by WinEHState.
  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
  SmallVector<const LandingPadInfo *, 4> LPads;
  LPads.reserve((PadInfos.size()));
  for (const LandingPadInfo &LPInfo : PadInfos)
    LPads.push_back(&LPInfo);
  std::sort(LPads.begin(), LPads.end(),
            [](const LandingPadInfo *L, const LandingPadInfo *R) {
              return L->WinEHState < R->WinEHState;
            });

  // For each action in each lpad, emit one of these:
  // struct ScopeTableEntry {
  //   int32_t EnclosingLevel;
  //   int32_t (__cdecl *Filter)();
  //   void *HandlerOrFinally;
  // };
  //
  // The "outermost" action will use BaseState as its enclosing level. Each
  // other action will refer to the previous state as its enclosing level.
  int CurState = 0;
  for (const LandingPadInfo *LPInfo : LPads) {
    int EnclosingLevel = BaseState;
    assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 ==
               LPInfo->WinEHState &&
           "gaps in the SEH scope table");
    for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend();
         I != E; ++I) {
      const SEHHandler &Handler = *I;
      const BlockAddress *BA = Handler.RecoverBA;
      const Function *F = Handler.FilterOrFinally;
      assert(F && "cannot catch all in 32-bit SEH without filter function");
      const MCExpr *FilterOrNull =
          create32bitRef(BA ? Asm->getSymbol(F) : nullptr);
      const MCExpr *ExceptOrFinally = create32bitRef(
          BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F));

      OS.EmitIntValue(EnclosingLevel, 4);
      OS.EmitValue(FilterOrNull, 4);
      OS.EmitValue(ExceptOrFinally, 4);

      // The next state unwinds to this state.
      EnclosingLevel = CurState;
      CurState++;
    }
  }
}
Example #29
0
Function* generateFunctionWrapperWithParams(const std::string& wrapper_name, Function* f, Module* mod, std::vector<const Type*>& additionalParams, const bool inlineCall) {
    assert (f && mod);
    assert (f->getParent());

    if (f->getParent() != mod) {
        errs() << "WARNING: generateFunctionWrapper(): module '"
                << mod->getModuleIdentifier()
                << "' is not the parent of function '"
                << f->getNameStr() << "' (parent: '"
                << f->getParent()->getModuleIdentifier() << "')!\n";
    }

    // first make sure there is no function with that name in mod
    // TODO: Implement logic from LLVM tutorial that checks for matching extern
    //       declaration without body and, in this case, goes on.
    if (mod->getFunction(wrapper_name)) {
        errs() << "ERROR: generateFunctionWrapper(): Function with name '"
                << wrapper_name << "' already exists in module '"
                << mod->getModuleIdentifier() << "'!\n";
        return NULL;
    }

    // warn if f has a return value
    if (!f->getReturnType()->isVoidTy()) {
        errs() << "WARNING: generateFunctionWrapper(): target function '"
                << f->getNameStr() << "' must not have a return type (ignored)!\n";
    }

    LLVMContext& context = mod->getContext();

    IRBuilder<> builder(context);

    // determine all arguments of f
    std::vector<const Argument*> oldArgs;
    std::vector<const Type*> oldArgTypes;
    for (Function::const_arg_iterator A=f->arg_begin(), AE=f->arg_end(); A!=AE; ++A) {
        oldArgs.push_back(A);
        oldArgTypes.push_back(A->getType());
    }

    // create a struct type with a member for each argument
    const StructType* argStructType = StructType::get(context, oldArgTypes, false);


    // create function
    //const FunctionType* fType = TypeBuilder<void(void*), true>::get(context);
    std::vector<const Type*> params;
    params.push_back(PointerType::getUnqual(argStructType));

    for (std::vector<const Type*>::const_iterator it=additionalParams.begin(), E=additionalParams.end(); it!=E; ++it) {
        params.push_back(*it);
    }

    const FunctionType* fType = FunctionType::get(Type::getVoidTy(context), params, false);
    Function* wrapper = Function::Create(fType, Function::ExternalLinkage, wrapper_name, mod);

    // set name of argument
    Argument* arg_str = wrapper->arg_begin();
    arg_str->setName("arg_struct");

    // create entry block
    BasicBlock* entryBB = BasicBlock::Create(context, "entry", wrapper);
    builder.SetInsertPoint(entryBB);

    // create extractions of arguments out of the struct
    SmallVector<Value*, 8> extractedArgs;
    for (unsigned i=0, e=oldArgTypes.size(); i<e; ++i) {
        // create GEP
        std::vector<Value*> indices;
        indices.push_back(Constant::getNullValue(Type::getInt32Ty(context))); // step through pointer
        indices.push_back(ConstantInt::get(context, APInt(32, i))); // index of argument
        Value* gep = builder.CreateGEP(arg_str, indices.begin(), indices.end(), "");

        // create load
        LoadInst* load = builder.CreateLoad(gep, false, "");

        // store as argument for call to f
        extractedArgs.push_back(load);
    }

    // create the call to f
    CallInst* call = builder.CreateCall(f, extractedArgs.begin(), extractedArgs.end(), "");

    // the function returns void
    builder.CreateRetVoid();


    //wrapper->addAttribute(0, Attribute::NoUnwind); // function does not unwind stack -> why is there an index required ???
    wrapper->setDoesNotCapture(1, true); // arg ptr does not capture
    wrapper->setDoesNotAlias(1, true);   // arg ptr does not alias

    // inline call if required
    if (inlineCall) {
        InlineFunctionInfo IFI(NULL, new TargetData(mod));
        const bool success = InlineFunction(call, IFI);
        if (!success) {
            errs() << "WARNING: could not inline function call inside wrapper: " << *call << "\n";
        }
        assert (success);
    }

    //verifyFunction(*wrapper, NULL);

    return wrapper;
}
Example #30
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  // Do not convert small short loops to CTR loop.
  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
    SmallPtrSet<const Value *, 32> EphValues;
    auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
        *L->getHeader()->getParent());
    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
    CodeMetrics Metrics;
    for (BasicBlock *BB : L->blocks())
      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
    // 6 is an approximate latency for the mtctr instruction.
    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
      return false;
  }

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
    LLVM_DEBUG(dbgs() << "Nested loop converted\n");
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

  // Bail out if the loop has irreducible control flow.
  LoopBlocksRPO RPOT(L);
  RPOT.perform(LI);
  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
    return false;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(*I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  // If there is an exit edge known to be frequently taken,
  // we should not transform this loop.
  for (auto &BB : ExitingBlocks) {
    Instruction *TI = BB->getTerminator();
    if (!TI) continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      uint64_t TrueWeight = 0, FalseWeight = 0;
      if (!BI->isConditional() ||
          !BI->extractProfMetadata(TrueWeight, FalseWeight))
        continue;

      // If the exit path is more frequent than the loop path,
      // we return here without further analysis for this loop.
      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
      if (( TrueIsExit && FalseWeight < TrueWeight) ||
          (!TrueIsExit && FalseWeight > TrueWeight))
        return MadeChange;
    }
  }

  BasicBlock *CountedExitBlock = nullptr;
  const SCEV *ExitCount = nullptr;
  BranchInst *CountedExitBranch = nullptr;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
                      << (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
      continue;

    // If this exiting block is contained in a nested loop, it is not eligible
    // for insertion of the branch-and-decrement since the inner loop would
    // end up messing up the value in the CTR.
    if (LI->getLoopFor(*I) != L)
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // existing block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(Preheader))
    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
  if (!Preheader)
    return MadeChange;

  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
                    << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
  Value *ECValue =
      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Function *MTCTRFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Function *DecFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  // Run through the basic blocks of the loop and see if any of them have dead
  // PHIs that can be removed.
  for (auto I : L->blocks())
    DeleteDeadPHIs(I);

  ++NumCTRLoops;
  return MadeChange;
}