Exemple #1
0
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
  const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();

  SmallPtrSet<Function*, 8> SCCFunctions;
  DEBUG(dbgs() << "Inliner visiting SCC:");
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (F) SCCFunctions.insert(F);
    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
  }

  // Scan through and identify all call sites ahead of time so that we only
  // inline call sites in the original functions, not call sites that result
  // from inlining other functions.
  SmallVector<std::pair<CallSite, int>, 16> CallSites;
  
  // When inlining a callee produces new call sites, we want to keep track of
  // the fact that they were inlined from the callee.  This allows us to avoid
  // infinite inlining in some obscure cases.  To represent this, we use an
  // index into the InlineHistory vector.
  SmallVector<std::pair<Function*, int>, 8> InlineHistory;

  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();
    if (!F) continue;
    
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
        CallSite CS(cast<Value>(I));
        // If this isn't a call, or it is a call to an intrinsic, it can
        // never be inlined.
        if (!CS || isa<IntrinsicInst>(I))
          continue;
        
        // If this is a direct call to an external function, we can never inline
        // it.  If it is an indirect call, inlining may resolve it to be a
        // direct call, so we keep it.
        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
          continue;
        
        CallSites.push_back(std::make_pair(CS, -1));
      }
  }

  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");

  // If there are no calls in this function, exit early.
  if (CallSites.empty())
    return false;
  
  // Now that we have all of the call sites, move the ones to functions in the
  // current SCC to the end of the list.
  unsigned FirstCallInSCC = CallSites.size();
  for (unsigned i = 0; i < FirstCallInSCC; ++i)
    if (Function *F = CallSites[i].first.getCalledFunction())
      if (SCCFunctions.count(F))
        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);

  
  InlinedArrayAllocasTy InlinedArrayAllocas;
  InlineFunctionInfo InlineInfo(&CG, AA, ACT);

  // Now that we have all of the call sites, loop over them and inline them if
  // it looks profitable to do so.
  bool Changed = false;
  bool LocalChange;
  do {
    LocalChange = false;
    // Iterate over the outer loop because inlining functions can cause indirect
    // calls to become direct calls.
    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
      CallSite CS = CallSites[CSi].first;
      
      Function *Caller = CS.getCaller();
      Function *Callee = CS.getCalledFunction();

      // If this call site is dead and it is to a readonly function, we should
      // just delete the call instead of trying to inline it, regardless of
      // size.  This happens because IPSCCP propagates the result out of the
      // call and then we're left with the dead call.
      if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
        DEBUG(dbgs() << "    -> Deleting dead call: "
                     << *CS.getInstruction() << "\n");
        // Update the call graph by deleting the edge from Callee to Caller.
        CG[Caller]->removeCallEdgeFor(CS);
        CS.getInstruction()->eraseFromParent();
        ++NumCallsDeleted;
      } else {
        // We can only inline direct calls to non-declarations.
        if (!Callee || Callee->isDeclaration()) continue;
      
        // If this call site was obtained by inlining another function, verify
        // that the include path for the function did not include the callee
        // itself.  If so, we'd be recursively inlining the same function,
        // which would provide the same callsites, which would cause us to
        // infinitely inline.
        int InlineHistoryID = CallSites[CSi].second;
        if (InlineHistoryID != -1 &&
            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
          continue;
        
        LLVMContext &CallerCtx = Caller->getContext();

        // Get DebugLoc to report. CS will be invalid after Inliner.
        DebugLoc DLoc = CS.getInstruction()->getDebugLoc();

        // If the policy determines that we should inline this function,
        // try to do so.
        if (!shouldInline(CS)) {
          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                       Twine(Callee->getName() +
                                             " will not be inlined into " +
                                             Caller->getName()));
          continue;
        }

        // Attempt to inline the function.
        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                  InlineHistoryID, InsertLifetime)) {
          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                       Twine(Callee->getName() +
                                             " will not be inlined into " +
                                             Caller->getName()));
          continue;
        }
        ++NumInlined;

        // Report the inline decision.
        emitOptimizationRemark(
            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
            Twine(Callee->getName() + " inlined into " + Caller->getName()));

        // If inlining this function gave us any new call sites, throw them
        // onto our worklist to process.  They are useful inline candidates.
        if (!InlineInfo.InlinedCalls.empty()) {
          // Create a new inline history entry for this, so that we remember
          // that these new callsites came about due to inlining Callee.
          int NewHistoryID = InlineHistory.size();
          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));

          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
               i != e; ++i) {
            Value *Ptr = InlineInfo.InlinedCalls[i];
            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
          }
        }
      }
      
      // If we inlined or deleted the last possible call site to the function,
      // delete the function body now.
      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
          // TODO: Can remove if in SCC now.
          !SCCFunctions.count(Callee) &&
          
          // The function may be apparently dead, but if there are indirect
          // callgraph references to the node, we cannot delete it yet, this
          // could invalidate the CGSCC iterator.
          CG[Callee]->getNumReferences() == 0) {
        DEBUG(dbgs() << "    -> Deleting dead function: "
              << Callee->getName() << "\n");
        CallGraphNode *CalleeNode = CG[Callee];
        
        // Remove any call graph edges from the callee to its callees.
        CalleeNode->removeAllCalledFunctions();
        
        // Removing the node for callee from the call graph and delete it.
        delete CG.removeFunctionFromModule(CalleeNode);
        ++NumDeleted;
      }

      // Remove this call site from the list.  If possible, use 
      // swap/pop_back for efficiency, but do not use it if doing so would
      // move a call site to a function in this SCC before the
      // 'FirstCallInSCC' barrier.
      if (SCC.isSingular()) {
        CallSites[CSi] = CallSites.back();
        CallSites.pop_back();
      } else {
        CallSites.erase(CallSites.begin()+CSi);
      }
      --CSi;

      Changed = true;
      LocalChange = true;
    }
  } while (LocalChange);

  return Changed;
}
void WinEHNumbering::processCallSite(ArrayRef<ActionHandler *> Actions,
                                     ImmutableCallSite CS) {
  int FirstMismatch = 0;
  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
       ++FirstMismatch) {
    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
        Actions[FirstMismatch]->getHandlerBlockOrFunc())
      break;
    delete Actions[FirstMismatch];
  }

  bool EnteringScope = (int)Actions.size() > FirstMismatch;

  // Don't recurse while we are looping over the handler stack.  Instead, defer
  // the numbering of the catch handlers until we are done popping.
  SmallVector<CatchHandler *, 4> PoppedCatches;
  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
    if (auto *CH = dyn_cast<CatchHandler>(HandlerStack.back())) {
      PoppedCatches.push_back(CH);
    } else {
      // Delete cleanup handlers
      delete HandlerStack.back();
    }
    HandlerStack.pop_back();
  }

  // We need to create a new state number if we are exiting a try scope and we
  // will not push any more actions.
  int TryHigh = NextState - 1;
  if (!EnteringScope && !PoppedCatches.empty()) {
    createUnwindMapEntry(currentEHNumber(), nullptr);
    ++NextState;
  }

  int LastTryLowIdx = 0;
  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
    CatchHandler *CH = PoppedCatches[I];
    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
      int TryLow = CH->getEHState();
      auto Handlers =
          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
      LastTryLowIdx = I + 1;
    }
  }

  for (CatchHandler *CH : PoppedCatches) {
    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc()))
      calculateStateNumbers(*F);
    delete CH;
  }

  bool LastActionWasCatch = false;
  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
    // We can reuse eh states when pushing two catches for the same invoke.
    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I]);
    // FIXME: Reenable this optimization!
    if (CurrActionIsCatch && LastActionWasCatch && false) {
      Actions[I]->setEHState(currentEHNumber());
    } else {
      createUnwindMapEntry(currentEHNumber(), Actions[I]);
      Actions[I]->setEHState(NextState);
      NextState++;
      DEBUG(dbgs() << "Creating unwind map entry for: (");
      print_name(Actions[I]->getHandlerBlockOrFunc());
      DEBUG(dbgs() << ", " << currentEHNumber() << ")\n");
    }
    HandlerStack.push_back(Actions[I]);
    LastActionWasCatch = CurrActionIsCatch;
  }

  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
  print_name(CS ? CS.getCalledValue() : nullptr);
  DEBUG(dbgs() << '\n');
}
Exemple #3
0
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  if (Succ->isLandingPad())
    return NULL;

  MachineFunction *MF = getParent();
  DebugLoc dl;  // FIXME: this is nowhere

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
  MachineBasicBlock *TBB = 0, *FBB = 0;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return NULL;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return NULL;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  ReplaceUsesOfBlockWith(Succ, NMBB);
  updateTerminator();

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
         E = Succ->livein_end(); I != E; ++I)
    NMBB->addLiveIn(*I);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
    // Update dominator information.
    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);

    bool IsNewIDom = true;
    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
         PI != E; ++PI) {
      MachineBasicBlock *PredBB = *PI;
      if (PredBB == NMBB)
        continue;
      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
        IsNewIDom = false;
        break;
      }
    }

    // We know "this" dominates the newly created basic block.
    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);

    // If all the other predecessors of "Succ" are dominated by "Succ" itself
    // then the new block is the new immediate dominator of "Succ". Otherwise,
    // the new block doesn't dominate anything.
    if (IsNewIDom)
      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
  }

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
void cl::ParseCommandLineOptions(int argc, char **argv,
                                 const char *Overview, bool ReadResponseFiles) {
  // Process all registered options.
  SmallVector<Option*, 4> PositionalOpts;
  SmallVector<Option*, 4> SinkOpts;
  StringMap<Option*> Opts;
  GetOptionInfo(PositionalOpts, SinkOpts, Opts);

  assert((!Opts.empty() || !PositionalOpts.empty()) &&
         "No options specified!");

  // Expand response files.
  std::vector<char*> newArgv;
  if (ReadResponseFiles) {
    newArgv.push_back(strdup(argv[0]));
    ExpandResponseFiles(argc, argv, newArgv);
    argv = &newArgv[0];
    argc = static_cast<int>(newArgv.size());
  }

  // Copy the program name into ProgName, making sure not to overflow it.
  std::string ProgName = sys::Path(argv[0]).getLast();
  size_t Len = std::min(ProgName.size(), size_t(79));
  memcpy(ProgramName, ProgName.data(), Len);
  ProgramName[Len] = '\0';

  ProgramOverview = Overview;
  bool ErrorParsing = false;

  // Check out the positional arguments to collect information about them.
  unsigned NumPositionalRequired = 0;

  // Determine whether or not there are an unlimited number of positionals
  bool HasUnlimitedPositionals = false;

  Option *ConsumeAfterOpt = 0;
  if (!PositionalOpts.empty()) {
    if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
      assert(PositionalOpts.size() > 1 &&
             "Cannot specify cl::ConsumeAfter without a positional argument!");
      ConsumeAfterOpt = PositionalOpts[0];
    }

    // Calculate how many positional values are _required_.
    bool UnboundedFound = false;
    for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
         i != e; ++i) {
      Option *Opt = PositionalOpts[i];
      if (RequiresValue(Opt))
        ++NumPositionalRequired;
      else if (ConsumeAfterOpt) {
        // ConsumeAfter cannot be combined with "optional" positional options
        // unless there is only one positional argument...
        if (PositionalOpts.size() > 2)
          ErrorParsing |=
            Opt->error("error - this positional option will never be matched, "
                       "because it does not Require a value, and a "
                       "cl::ConsumeAfter option is active!");
      } else if (UnboundedFound && !Opt->ArgStr[0]) {
        // This option does not "require" a value...  Make sure this option is
        // not specified after an option that eats all extra arguments, or this
        // one will never get any!
        //
        ErrorParsing |= Opt->error("error - option can never match, because "
                                   "another positional argument will match an "
                                   "unbounded number of values, and this option"
                                   " does not require a value!");
      }
      UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
    }
    HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
  }

  // PositionalVals - A vector of "positional" arguments we accumulate into
  // the process at the end.
  //
  SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;

  // If the program has named positional arguments, and the name has been run
  // across, keep track of which positional argument was named.  Otherwise put
  // the positional args into the PositionalVals list...
  Option *ActivePositionalArg = 0;

  // Loop over all of the arguments... processing them.
  bool DashDashFound = false;  // Have we read '--'?
  for (int i = 1; i < argc; ++i) {
    Option *Handler = 0;
    StringRef Value;
    StringRef ArgName = "";

    // If the option list changed, this means that some command line
    // option has just been registered or deregistered.  This can occur in
    // response to things like -load, etc.  If this happens, rescan the options.
    if (OptionListChanged) {
      PositionalOpts.clear();
      SinkOpts.clear();
      Opts.clear();
      GetOptionInfo(PositionalOpts, SinkOpts, Opts);
      OptionListChanged = false;
    }

    // Check to see if this is a positional argument.  This argument is
    // considered to be positional if it doesn't start with '-', if it is "-"
    // itself, or if we have seen "--" already.
    //
    if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
      // Positional argument!
      if (ActivePositionalArg) {
        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
        continue;  // We are done!
      }

      if (!PositionalOpts.empty()) {
        PositionalVals.push_back(std::make_pair(argv[i],i));

        // All of the positional arguments have been fulfulled, give the rest to
        // the consume after option... if it's specified...
        //
        if (PositionalVals.size() >= NumPositionalRequired &&
            ConsumeAfterOpt != 0) {
          for (++i; i < argc; ++i)
            PositionalVals.push_back(std::make_pair(argv[i],i));
          break;   // Handle outside of the argument processing loop...
        }

        // Delay processing positional arguments until the end...
        continue;
      }
    } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
               !DashDashFound) {
      DashDashFound = true;  // This is the mythical "--"?
      continue;              // Don't try to process it as an argument itself.
    } else if (ActivePositionalArg &&
               (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
      // If there is a positional argument eating options, check to see if this
      // option is another positional argument.  If so, treat it as an argument,
      // otherwise feed it to the eating positional.
      ArgName = argv[i]+1;
      // Eat leading dashes.
      while (!ArgName.empty() && ArgName[0] == '-')
        ArgName = ArgName.substr(1);

      Handler = LookupOption(ArgName, Value, Opts);
      if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
        continue;  // We are done!
      }

    } else {     // We start with a '-', must be an argument.
      ArgName = argv[i]+1;
      // Eat leading dashes.
      while (!ArgName.empty() && ArgName[0] == '-')
        ArgName = ArgName.substr(1);

      Handler = LookupOption(ArgName, Value, Opts);

      // Check to see if this "option" is really a prefixed or grouped argument.
      if (Handler == 0)
        Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
                                                ErrorParsing, Opts);
    }

    if (Handler == 0) {
      if (SinkOpts.empty()) {
        errs() << ProgramName << ": Unknown command line argument '"
             << argv[i] << "'.  Try: '" << argv[0] << " -help'\n";
        ErrorParsing = true;
      } else {
        for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
               E = SinkOpts.end(); I != E ; ++I)
          (*I)->addOccurrence(i, "", argv[i]);
      }
      continue;
    }

    // If this is a named positional argument, just remember that it is the
    // active one...
    if (Handler->getFormattingFlag() == cl::Positional)
      ActivePositionalArg = Handler;
    else
      ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
  }

  // Check and handle positional arguments now...
  if (NumPositionalRequired > PositionalVals.size()) {
    errs() << ProgramName
         << ": Not enough positional command line arguments specified!\n"
         << "Must specify at least " << NumPositionalRequired
         << " positional arguments: See: " << argv[0] << " -help\n";

    ErrorParsing = true;
  } else if (!HasUnlimitedPositionals &&
             PositionalVals.size() > PositionalOpts.size()) {
    errs() << ProgramName
         << ": Too many positional arguments specified!\n"
         << "Can specify at most " << PositionalOpts.size()
         << " positional arguments: See: " << argv[0] << " -help\n";
    ErrorParsing = true;

  } else if (ConsumeAfterOpt == 0) {
    // Positional args have already been handled if ConsumeAfter is specified.
    unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
    for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
      if (RequiresValue(PositionalOpts[i])) {
        ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
                                PositionalVals[ValNo].second);
        ValNo++;
        --NumPositionalRequired;  // We fulfilled our duty...
      }

      // If we _can_ give this option more arguments, do so now, as long as we
      // do not give it values that others need.  'Done' controls whether the
      // option even _WANTS_ any more.
      //
      bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
      while (NumVals-ValNo > NumPositionalRequired && !Done) {
        switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
        case cl::Optional:
          Done = true;          // Optional arguments want _at most_ one value
          // FALL THROUGH
        case cl::ZeroOrMore:    // Zero or more will take all they can get...
        case cl::OneOrMore:     // One or more will take all they can get...
          ProvidePositionalOption(PositionalOpts[i],
                                  PositionalVals[ValNo].first,
                                  PositionalVals[ValNo].second);
          ValNo++;
          break;
        default:
          llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
                 "positional argument processing!");
        }
      }
    }
  } else {
    assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
    unsigned ValNo = 0;
    for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
      if (RequiresValue(PositionalOpts[j])) {
        ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
                                                PositionalVals[ValNo].first,
                                                PositionalVals[ValNo].second);
        ValNo++;
      }

    // Handle the case where there is just one positional option, and it's
    // optional.  In this case, we want to give JUST THE FIRST option to the
    // positional option and keep the rest for the consume after.  The above
    // loop would have assigned no values to positional options in this case.
    //
    if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
      ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
                                              PositionalVals[ValNo].first,
                                              PositionalVals[ValNo].second);
      ValNo++;
    }

    // Handle over all of the rest of the arguments to the
    // cl::ConsumeAfter command line option...
    for (; ValNo != PositionalVals.size(); ++ValNo)
      ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
                                              PositionalVals[ValNo].first,
                                              PositionalVals[ValNo].second);
  }

  // Loop over args and make sure all required args are specified!
  for (StringMap<Option*>::iterator I = Opts.begin(),
         E = Opts.end(); I != E; ++I) {
    switch (I->second->getNumOccurrencesFlag()) {
    case Required:
    case OneOrMore:
      if (I->second->getNumOccurrences() == 0) {
        I->second->error("must be specified at least once!");
        ErrorParsing = true;
      }
      // Fall through
    default:
      break;
    }
  }

  // Free all of the memory allocated to the map.  Command line options may only
  // be processed once!
  Opts.clear();
  PositionalOpts.clear();
  MoreHelp->clear();

  // Free the memory allocated by ExpandResponseFiles.
  if (ReadResponseFiles) {
    // Free all the strdup()ed strings.
    for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
         i != e; ++i)
      free(*i);
  }

  DEBUG(dbgs() << "Args: ";
        for (int i = 0; i < argc; ++i)
          dbgs() << argv[i] << ' ';
        dbgs() << '\n';
       );
bool
TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                     const DenseSet<unsigned> &UsedByPhi,
                                     SmallVector<MachineInstr*, 16> &Copies) {
    SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
            TailBB->succ_end());
    SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
            TailBB->pred_end());
    bool Changed = false;
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;

        if (PredBB->getLandingPadSuccessor())
            continue;

        if (bothUsedInPHI(*PredBB, Succs))
            continue;

        MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
        SmallVector<MachineOperand, 4> PredCond;
        if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
            continue;

        Changed = true;
        DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
              << "From simple Succ: " << *TailBB);

        MachineBasicBlock *NewTarget = *TailBB->succ_begin();
        MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));

        // Make PredFBB explicit.
        if (PredCond.empty())
            PredFBB = PredTBB;

        // Make fall through explicit.
        if (!PredTBB)
            PredTBB = NextBB;
        if (!PredFBB)
            PredFBB = NextBB;

        // Redirect
        if (PredFBB == TailBB)
            PredFBB = NewTarget;
        if (PredTBB == TailBB)
            PredTBB = NewTarget;

        // Make the branch unconditional if possible
        if (PredTBB == PredFBB) {
            PredCond.clear();
            PredFBB = NULL;
        }

        // Avoid adding fall through branches.
        if (PredFBB == NextBB)
            PredFBB = NULL;
        if (PredTBB == NextBB && PredFBB == NULL)
            PredTBB = NULL;

        TII->RemoveBranch(*PredBB);

        if (PredTBB)
            TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());

        PredBB->removeSuccessor(TailBB);
        unsigned NumSuccessors = PredBB->succ_size();
        assert(NumSuccessors <= 1);
        if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
            PredBB->addSuccessor(NewTarget);

        TDBBs.push_back(PredBB);
    }
    return Changed;
}
// Test whether it's safe to move Def to just before Insert.
// TODO: Compute memory dependencies in a way that doesn't require always
// walking the block.
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
                         AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
  assert(Def->getParent() == Insert->getParent());

  // Check for register dependencies.
  SmallVector<unsigned, 4> MutableRegisters;
  for (const MachineOperand &MO : Def->operands()) {
    if (!MO.isReg() || MO.isUndef())
      continue;
    unsigned Reg = MO.getReg();

    // If the register is dead here and at Insert, ignore it.
    if (MO.isDead() && Insert->definesRegister(Reg) &&
        !Insert->readsRegister(Reg))
      continue;

    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
      // from moving down, and we've already checked for that.
      if (Reg == WebAssembly::ARGUMENTS)
        continue;
      // If the physical register is never modified, ignore it.
      if (!MRI.isPhysRegModified(Reg))
        continue;
      // Otherwise, it's a physical register with unknown liveness.
      return false;
    }

    // If one of the operands isn't in SSA form, it has different values at
    // different times, and we need to make sure we don't move our use across
    // a different def.
    if (!MO.isDef() && !MRI.hasOneDef(Reg))
      MutableRegisters.push_back(Reg);
  }

  bool Read = false, Write = false, Effects = false, StackPointer = false;
  Query(*Def, AA, Read, Write, Effects, StackPointer);

  // If the instruction does not access memory and has no side effects, it has
  // no additional dependencies.
  bool HasMutableRegisters = !MutableRegisters.empty();
  if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
    return true;

  // Scan through the intervening instructions between Def and Insert.
  MachineBasicBlock::const_iterator D(Def), I(Insert);
  for (--I; I != D; --I) {
    bool InterveningRead = false;
    bool InterveningWrite = false;
    bool InterveningEffects = false;
    bool InterveningStackPointer = false;
    Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
          InterveningStackPointer);
    if (Effects && InterveningEffects)
      return false;
    if (Read && InterveningWrite)
      return false;
    if (Write && (InterveningRead || InterveningWrite))
      return false;
    if (StackPointer && InterveningStackPointer)
      return false;

    for (unsigned Reg : MutableRegisters)
      for (const MachineOperand &MO : I->operands())
        if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
          return false;
  }

  return true;
}
Exemple #7
0
/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
/// we spill into a stack location, guaranteeing that there is nothing live
/// across the unwind edge.  This process also splits all critical edges
/// coming out of invoke's.
/// FIXME: Move this function to a common utility file (Local.cpp?) so
/// both SjLj and LowerInvoke can use it.
void SjLjEHPass::
splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
  // First step, split all critical edges from invoke instructions.
  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
    InvokeInst *II = Invokes[i];
    SplitCriticalEdge(II, 0, this);
    SplitCriticalEdge(II, 1, this);
    assert(!isa<PHINode>(II->getNormalDest()) &&
           !isa<PHINode>(II->getUnwindDest()) &&
           "critical edge splitting left single entry phi nodes?");
  }

  Function *F = Invokes.back()->getParent()->getParent();

  // To avoid having to handle incoming arguments specially, we lower each arg
  // to a copy instruction in the entry block.  This ensures that the argument
  // value itself cannot be live across the entry block.
  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
    ++AfterAllocaInsertPt;
  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
       AI != E; ++AI) {
    const Type *Ty = AI->getType();
    // Aggregate types can't be cast, but are legal argument types, so we have
    // to handle them differently. We use an extract/insert pair as a
    // lightweight method to achieve the same goal.
    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
      NI->insertAfter(EI);
      AI->replaceAllUsesWith(NI);
      // Set the operand of the instructions back to the AllocaInst.
      EI->setOperand(0, AI);
      NI->setOperand(0, AI);
    } else {
      // This is always a no-op cast because we're casting AI to AI->getType()
      // so src and destination types are identical. BitCast is the only
      // possibility.
      CastInst *NC = new BitCastInst(
        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
      AI->replaceAllUsesWith(NC);
      // Set the operand of the cast instruction back to the AllocaInst.
      // Normally it's forbidden to replace a CastInst's operand because it
      // could cause the opcode to reflect an illegal conversion. However,
      // we're replacing it here with the same value it was constructed with.
      // We do this because the above replaceAllUsesWith() clobbered the
      // operand, but we want this one to remain.
      NC->setOperand(0, AI);
    }
  }

  // Finally, scan the code looking for instructions with bad live ranges.
  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
      // Ignore obvious cases we don't have to handle.  In particular, most
      // instructions either have no uses or only have a single use inside the
      // current block.  Ignore them quickly.
      Instruction *Inst = II;
      if (Inst->use_empty()) continue;
      if (Inst->hasOneUse() &&
          cast<Instruction>(Inst->use_back())->getParent() == BB &&
          !isa<PHINode>(Inst->use_back())) continue;

      // If this is an alloca in the entry block, it's not a real register
      // value.
      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
          continue;

      // Avoid iterator invalidation by copying users to a temporary vector.
      SmallVector<Instruction*,16> Users;
      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
           UI != E; ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        if (User->getParent() != BB || isa<PHINode>(User))
          Users.push_back(User);
      }

      // Find all of the blocks that this value is live in.
      std::set<BasicBlock*> LiveBBs;
      LiveBBs.insert(Inst->getParent());
      while (!Users.empty()) {
        Instruction *U = Users.back();
        Users.pop_back();

        if (!isa<PHINode>(U)) {
          MarkBlocksLiveIn(U->getParent(), LiveBBs);
        } else {
          // Uses for a PHI node occur in their predecessor block.
          PHINode *PN = cast<PHINode>(U);
          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
            if (PN->getIncomingValue(i) == Inst)
              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
        }
      }

      // Now that we know all of the blocks that this thing is live in, see if
      // it includes any of the unwind locations.
      bool NeedsSpill = false;
      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
          NeedsSpill = true;
        }
      }

      // If we decided we need a spill, do it.
      // FIXME: Spilling this way is overkill, as it forces all uses of
      // the value to be reloaded from the stack slot, even those that aren't
      // in the unwind blocks. We should be more selective.
      if (NeedsSpill) {
        ++NumSpilled;
        DemoteRegToStack(*Inst, true);
      }
    }
}
void ScheduleDAGSDNodes::BuildSchedUnits() {
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
  // to their associated SUnits by holding SUnits table indices. A value
  // of -1 means the SDNode does not yet have an associated SUnit.
  unsigned NumNodes = 0;
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    NI->setNodeId(-1);
    ++NumNodes;
  }

  // Reserve entries in the vector for each of the SUnits we are creating.  This
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  // invalidated.
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  // This is a temporary workaround.
  SUnits.reserve(NumNodes * 2);

  // Add all nodes in depth first order.
  SmallVector<SDNode*, 64> Worklist;
  SmallPtrSet<SDNode*, 64> Visited;
  Worklist.push_back(DAG->getRoot().getNode());
  Visited.insert(DAG->getRoot().getNode());

  SmallVector<SUnit*, 8> CallSUnits;
  while (!Worklist.empty()) {
    SDNode *NI = Worklist.pop_back_val();

    // Add all operands to the worklist unless they've already been added.
    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
      if (Visited.insert(NI->getOperand(i).getNode()).second)
        Worklist.push_back(NI->getOperand(i).getNode());

    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
      continue;

    // If this node has already been processed, stop now.
    if (NI->getNodeId() != -1) continue;

    SUnit *NodeSUnit = newSUnit(NI);

    // See if anything is glued to this node, if so, add them to glued
    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
    // is required to be the last operand and result of a node.

    // Scan up to find glued preds.
    SDNode *N = NI;
    while (N->getNumOperands() &&
           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
      N = N->getOperand(N->getNumOperands()-1).getNode();
      assert(N->getNodeId() == -1 && "Node already inserted!");
      N->setNodeId(NodeSUnit->NodeNum);
      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
        NodeSUnit->isCall = true;
    }

    // Scan down to find any glued succs.
    N = NI;
    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
      SDValue GlueVal(N, N->getNumValues()-1);

      // There are either zero or one users of the Glue result.
      bool HasGlueUse = false;
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
           UI != E; ++UI)
        if (GlueVal.isOperandOf(*UI)) {
          HasGlueUse = true;
          assert(N->getNodeId() == -1 && "Node already inserted!");
          N->setNodeId(NodeSUnit->NodeNum);
          N = *UI;
          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
            NodeSUnit->isCall = true;
          break;
        }
      if (!HasGlueUse) break;
    }

    if (NodeSUnit->isCall)
      CallSUnits.push_back(NodeSUnit);

    // Schedule zero-latency TokenFactor below any nodes that may increase the
    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
    // have false stalls.
    if (NI->getOpcode() == ISD::TokenFactor)
      NodeSUnit->isScheduleLow = true;

    // If there are glue operands involved, N is now the bottom-most node
    // of the sequence of nodes that are glued together.
    // Update the SUnit.
    NodeSUnit->setNode(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NodeSUnit->NodeNum);

    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
    InitNumRegDefsLeft(NodeSUnit);

    // Assign the Latency field of NodeSUnit using target-provided information.
    computeLatency(NodeSUnit);
  }

  // Find all call operands.
  while (!CallSUnits.empty()) {
    SUnit *SU = CallSUnits.pop_back_val();
    for (const SDNode *SUNode = SU->getNode(); SUNode;
         SUNode = SUNode->getGluedNode()) {
      if (SUNode->getOpcode() != ISD::CopyToReg)
        continue;
      SDNode *SrcN = SUNode->getOperand(2).getNode();
      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
      SrcSU->isCallOp = true;
    }
  }
}
/// EmitSchedule - Emit the machine code in scheduled order. Return the new
/// InsertPos and MachineBasicBlock that contains this insertion
/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
  InstrEmitter Emitter(BB, InsertPos);
  DenseMap<SDValue, unsigned> VRBaseMap;
  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
  SmallSet<unsigned, 8> Seen;
  bool HasDbg = DAG->hasDebugValues();

  // If this is the first BB, emit byval parameter dbg_value's.
  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
    for (; PDI != PDE; ++PDI) {
      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
      if (DbgMI)
        BB->insert(InsertPos, DbgMI);
    }
  }

  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
    SUnit *SU = Sequence[i];
    if (!SU) {
      // Null SUnit* is a noop.
      TII->insertNoop(*Emitter.getBlock(), InsertPos);
      continue;
    }

    // For pre-regalloc scheduling, create instructions corresponding to the
    // SDNode and any glued SDNodes and append them to the block.
    if (!SU->getNode()) {
      // Emit a copy.
      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
      continue;
    }

    SmallVector<SDNode *, 4> GluedNodes;
    for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
      GluedNodes.push_back(N);
    while (!GluedNodes.empty()) {
      SDNode *N = GluedNodes.back();
      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                       VRBaseMap);
      // Remember the source order of the inserted instruction.
      if (HasDbg)
        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
      GluedNodes.pop_back();
    }
    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                     VRBaseMap);
    // Remember the source order of the inserted instruction.
    if (HasDbg)
      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
                        Seen);
  }

  // Insert all the dbg_values which have not already been inserted in source
  // order sequence.
  if (HasDbg) {
    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();

    // Sort the source order instructions and use the order to insert debug
    // values.
    std::sort(Orders.begin(), Orders.end(), less_first());

    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
    // Now emit the rest according to source order.
    unsigned LastOrder = 0;
    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
      unsigned Order = Orders[i].first;
      MachineInstr *MI = Orders[i].second;
      // Insert all SDDbgValue's whose order(s) are before "Order".
      if (!MI)
        continue;
      for (; DI != DE &&
             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
        if ((*DI)->isInvalidated())
          continue;
        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
        if (DbgMI) {
          if (!LastOrder)
            // Insert to start of the BB (after PHIs).
            BB->insert(BBBegin, DbgMI);
          else {
            // Insert at the instruction, which may be in a different
            // block, if the block was split by a custom inserter.
            MachineBasicBlock::iterator Pos = MI;
            MI->getParent()->insert(Pos, DbgMI);
          }
        }
      }
      LastOrder = Order;
    }
    // Add trailing DbgValue's before the terminator. FIXME: May want to add
    // some of them before one or more conditional branches?
    SmallVector<MachineInstr*, 8> DbgMIs;
    while (DI != DE) {
      if (!(*DI)->isInvalidated())
        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
          DbgMIs.push_back(DbgMI);
      ++DI;
    }

    MachineBasicBlock *InsertBB = Emitter.getBlock();
    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
  }

  InsertPos = Emitter.getInsertPos();
  return Emitter.getBlock();
}
Exemple #10
0
// LowerCCCCallTo - functions arguments are copied from virtual regs to
// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue LanaiTargetLowering::LowerCCCCallTo(
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool IsVarArg,
    bool IsTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs,
    const SmallVectorImpl<SDValue> &OutVals,
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                 *DAG.getContext());
  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

  NumFixedArgs = 0;
  if (IsVarArg && G) {
    const Function *CalleeFn = dyn_cast<Function>(G->getGlobal());
    if (CalleeFn)
      NumFixedArgs = CalleeFn->getFunctionType()->getNumParams();
  }
  if (NumFixedArgs)
    CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_VarArg);
  else {
    if (CallConv == CallingConv::Fast)
      CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_Fast);
    else
      CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32);
  }

  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getNextStackOffset();

  // Create local copies for byval args.
  SmallVector<SDValue, 8> ByValArgs;
  for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
    ISD::ArgFlagsTy Flags = Outs[I].Flags;
    if (!Flags.isByVal())
      continue;

    SDValue Arg = OutVals[I];
    unsigned Size = Flags.getByValSize();
    unsigned Align = Flags.getByValAlign();

    int FI = MFI->CreateStackObject(Size, Align, false);
    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
    SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32);

    Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
                          /*IsVolatile=*/false,
                          /*AlwaysInline=*/false,
                          /*IsTailCall=*/false, MachinePointerInfo(),
                          MachinePointerInfo());
    ByValArgs.push_back(FIPtr);
  }

  Chain = DAG.getCALLSEQ_START(
      Chain,
      DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true),
      DL);

  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
  SmallVector<SDValue, 12> MemOpChains;
  SDValue StackPtr;

  // Walk the register/memloc assignments, inserting copies/loads.
  for (unsigned I = 0, J = 0, E = ArgLocs.size(); I != E; ++I) {
    CCValAssign &VA = ArgLocs[I];
    SDValue Arg = OutVals[I];
    ISD::ArgFlagsTy Flags = Outs[I].Flags;

    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    case CCValAssign::Full:
      break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    default:
      llvm_unreachable("Unknown loc info!");
    }

    // Use local copy if it is a byval arg.
    if (Flags.isByVal())
      Arg = ByValArgs[J++];

    // Arguments that can be passed on register must be kept at RegsToPass
    // vector
    if (VA.isRegLoc()) {
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {
      assert(VA.isMemLoc());

      if (StackPtr.getNode() == 0)
        StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
                                      getPointerTy(DAG.getDataLayout()));

      SDValue PtrOff =
          DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
                      DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));

      MemOpChains.push_back(DAG.getStore(
          Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, false, 0));
    }
  }

  // Transform all store nodes into one single node because all store nodes are
  // independent of each other.
  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                        ArrayRef<SDValue>(&MemOpChains[0], MemOpChains.size()));

  SDValue InFlag;

  // Build a sequence of copy-to-reg nodes chained together with token chain and
  // flag operands which copy the outgoing args into registers.  The InFlag in
  // necessary since all emitted instructions must be stuck together.
  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
                             RegsToPass[I].second, InFlag);
    InFlag = Chain.getValue(1);
  }

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  // Likewise ExternalSymbol -> TargetExternalSymbol.
  uint8_t OpFlag = LanaiII::MO_NO_FLAG;
  if (G) {
    Callee = DAG.getTargetGlobalAddress(
        G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, OpFlag);
  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    Callee = DAG.getTargetExternalSymbol(
        E->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlag);
  }

  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  // Add a register mask operand representing the call-preserved registers.
  // TODO: Should return-twice functions be handled?
  const uint32_t *Mask =
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
  assert(Mask && "Missing call preserved mask for calling convention");
  Ops.push_back(DAG.getRegisterMask(Mask));

  // Add argument registers to the end of the list so that they are
  // known live into the call.
  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
    Ops.push_back(DAG.getRegister(RegsToPass[I].first,
                                  RegsToPass[I].second.getValueType()));

  if (InFlag.getNode())
    Ops.push_back(InFlag);

  Chain = DAG.getNode(LanaiISD::CALL, DL, NodeTys,
                      ArrayRef<SDValue>(&Ops[0], Ops.size()));
  InFlag = Chain.getValue(1);

  // Create the CALLSEQ_END node.
  Chain = DAG.getCALLSEQ_END(
      Chain,
      DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true),
      DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout()), true), InFlag,
      DL);
  InFlag = Chain.getValue(1);

  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
                         InVals);
}
static bool mergeConstants(Module &M) {
  // Find all the globals that are marked "used".  These cannot be merged.
  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);

  // Map unique constants to globals.
  DenseMap<Constant *, GlobalVariable *> CMap;

  // Replacements - This vector contains a list of replacements to perform.
  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;

  bool MadeChange = false;

  // XXX EMSCRIPTEN: mark @__init_array_start as not to be touched
  const GlobalValue *InitArrayStart = M.getNamedGlobal("__init_array_start");

  // Iterate constant merging while we are still making progress.  Merging two
  // constants together may allow us to merge other constants together if the
  // second level constants have initializers which point to the globals that
  // were just merged.
  while (1) {

    // First: Find the canonical constants others will be merged with.
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = &*GVI++;

      // XXX EMSCRIPTEN: mark @__init_array_start as not to be touched
      if (GV == InitArrayStart)
        continue;

      // If this GV is dead, remove it.
      GV->removeDeadConstantUsers();
      if (GV->use_empty() && GV->hasLocalLinkage()) {
        GV->eraseFromParent();
        continue;
      }

      // Only process constants with initializers in the default address space.
      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
          // Don't touch values marked with attribute(used).
          UsedGlobals.count(GV))
        continue;

      // This transformation is legal for weak ODR globals in the sense it
      // doesn't change semantics, but we really don't want to perform it
      // anyway; it's likely to pessimize code generation, and some tools
      // (like the Darwin linker in cases involving CFString) don't expect it.
      if (GV->isWeakForLinker())
        continue;

      Constant *Init = GV->getInitializer();

      // Check to see if the initializer is already known.
      GlobalVariable *&Slot = CMap[Init];

      // If this is the first constant we find or if the old one is local,
      // replace with the current one. If the current is externally visible
      // it cannot be replace, but can be the canonical constant we merge with.
      if (!Slot || IsBetterCanonical(*GV, *Slot))
        Slot = GV;
    }

    // Second: identify all globals that can be merged together, filling in
    // the Replacements vector.  We cannot do the replacement in this pass
    // because doing so may cause initializers of other globals to be rewritten,
    // invalidating the Constant* pointers in CMap.
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = &*GVI++;

      // Only process constants with initializers in the default address space.
      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
          // Don't touch values marked with attribute(used).
          UsedGlobals.count(GV))
        continue;

      // We can only replace constant with local linkage.
      if (!GV->hasLocalLinkage())
        continue;

      Constant *Init = GV->getInitializer();

      // Check to see if the initializer is already known.
      GlobalVariable *Slot = CMap[Init];

      if (!Slot || Slot == GV)
        continue;

      if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
        continue;

      if (!GV->hasGlobalUnnamedAddr())
        Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);

      // Make all uses of the duplicate constant use the canonical version.
      Replacements.push_back(std::make_pair(GV, Slot));
    }

    if (Replacements.empty())
      return MadeChange;
    CMap.clear();

    // Now that we have figured out which replacements must be made, do them all
    // now.  This avoid invalidating the pointers in CMap, which are unneeded
    // now.
    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
      // Bump the alignment if necessary.
      if (Replacements[i].first->getAlignment() ||
          Replacements[i].second->getAlignment()) {
        Replacements[i].second->setAlignment(
            std::max(getAlignment(Replacements[i].first),
                     getAlignment(Replacements[i].second)));
      }

      // Eliminate any uses of the dead global.
      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);

      // Delete the global value from the module.
      assert(Replacements[i].first->hasLocalLinkage() &&
             "Refusing to delete an externally visible global variable.");
      Replacements[i].first->eraseFromParent();
    }

    NumMerged += Replacements.size();
    Replacements.clear();
  }
}
/// isSafeToPromoteArgument - As you might guess from the name of this method,
/// it checks to see if it is both safe and useful to promote the argument.
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
  typedef std::set<IndicesVector> GEPIndicesSet;

  // Quick exit for unused arguments
  if (Arg->use_empty())
    return true;

  // We can only promote this argument if all of the uses are loads, or are GEP
  // instructions (with constant indices) that are subsequently loaded.
  //
  // Promoting the argument causes it to be loaded in the caller
  // unconditionally. This is only safe if we can prove that either the load
  // would have happened in the callee anyway (ie, there is a load in the entry
  // block) or the pointer passed in at every call site is guaranteed to be
  // valid.
  // In the former case, invalid loads can happen, but would have happened
  // anyway, in the latter case, invalid loads won't happen. This prevents us
  // from introducing an invalid load that wouldn't have happened in the
  // original code.
  //
  // This set will contain all sets of indices that are loaded in the entry
  // block, and thus are safe to unconditionally load in the caller.
  GEPIndicesSet SafeToUnconditionallyLoad;

  // This set contains all the sets of indices that we are planning to promote.
  // This makes it possible to limit the number of arguments added.
  GEPIndicesSet ToPromote;

  // If the pointer is always valid, any load with first index 0 is valid.
  if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));

  // First, iterate the entry block and mark loads of (geps of) arguments as
  // safe.
  BasicBlock *EntryBlock = Arg->getParent()->begin();
  // Declare this here so we can reuse it
  IndicesVector Indices;
  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
       I != E; ++I)
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      Value *V = LI->getPointerOperand();
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
        V = GEP->getPointerOperand();
        if (V == Arg) {
          // This load actually loads (part of) Arg? Check the indices then.
          Indices.reserve(GEP->getNumIndices());
          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
               II != IE; ++II)
            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
              Indices.push_back(CI->getSExtValue());
            else
              // We found a non-constant GEP index for this argument? Bail out
              // right away, can't promote this argument at all.
              return false;

          // Indices checked out, mark them as safe
          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
          Indices.clear();
        }
      } else if (V == Arg) {
        // Direct loads are equivalent to a GEP with a single 0 index.
        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
      }
    }

  // Now, iterate all uses of the argument to see if there are any uses that are
  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
  SmallVector<LoadInst*, 16> Loads;
  IndicesVector Operands;
  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
       UI != E; ++UI) {
    User *U = *UI;
    Operands.clear();
    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
      if (LI->isVolatile()) return false;  // Don't hack volatile loads
      Loads.push_back(LI);
      // Direct loads are equivalent to a GEP with a zero index and then a load.
      Operands.push_back(0);
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
      if (GEP->use_empty()) {
        // Dead GEP's cause trouble later.  Just remove them if we run into
        // them.
        getAnalysis<AliasAnalysis>().deleteValue(GEP);
        GEP->eraseFromParent();
        // TODO: This runs the above loop over and over again for dead GEPs
        // Couldn't we just do increment the UI iterator earlier and erase the
        // use?
        return isSafeToPromoteArgument(Arg, isByVal);
      }

      // Ensure that all of the indices are constants.
      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
        i != e; ++i)
        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
          Operands.push_back(C->getSExtValue());
        else
          return false;  // Not a constant operand GEP!

      // Ensure that the only users of the GEP are load instructions.
      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
           UI != E; ++UI)
        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
          if (LI->isVolatile()) return false;  // Don't hack volatile loads
          Loads.push_back(LI);
        } else {
          // Other uses than load?
          return false;
        }
    } else {
      return false;  // Not a load or a GEP.
    }

    // Now, see if it is safe to promote this load / loads of this GEP. Loading
    // is safe if Operands, or a prefix of Operands, is marked as safe.
    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
      return false;

    // See if we are already promoting a load with these indices. If not, check
    // to make sure that we aren't promoting too many elements.  If so, nothing
    // to do.
    if (ToPromote.find(Operands) == ToPromote.end()) {
      if (maxElements > 0 && ToPromote.size() == maxElements) {
        DEBUG(dbgs() << "argpromotion not promoting argument '"
              << Arg->getName() << "' because it would require adding more "
              << "than " << maxElements << " arguments to the function.\n");
        // We limit aggregate promotion to only promoting up to a fixed number
        // of elements of the aggregate.
        return false;
      }
      ToPromote.insert(Operands);
    }
  }

  if (Loads.empty()) return true;  // No users, this is a dead argument.

  // Okay, now we know that the argument is only used by load instructions and
  // it is safe to unconditionally perform all of them. Use alias analysis to
  // check to see if the pointer is guaranteed to not be modified from entry of
  // the function to each of the load instructions.

  // Because there could be several/many load instructions, remember which
  // blocks we know to be transparent to the load.
  SmallPtrSet<BasicBlock*, 16> TranspBlocks;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
    // Check to see if the load is invalidated from the start of the block to
    // the load itself.
    LoadInst *Load = Loads[i];
    BasicBlock *BB = Load->getParent();

    AliasAnalysis::Location Loc = AA.getLocation(Load);
    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
      return false;  // Pointer is invalidated!

    // Now check every path from the entry block to the load for transparency.
    // To do this, we perform a depth first search on the inverse CFG from the
    // loading block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *P = *PI;
      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
             I = idf_ext_begin(P, TranspBlocks),
             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
        if (AA.canBasicBlockModify(**I, Loc))
          return false;
    }
  }

  // If the path from the entry of the function to each load is free of
  // instructions that potentially invalidate the load, we can make the
  // transformation!
  return true;
}
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct).  If safe to promote some arguments, it
/// calls the DoPromotion method.
///
CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  Function *F = CGN->getFunction();

  // Make sure that it is local to this module.
  if (!F || !F->hasLocalLinkage()) return 0;

  // First check: see if there are any pointer arguments!  If not, quick exit.
  SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
  unsigned ArgNo = 0;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
       I != E; ++I, ++ArgNo)
    if (I->getType()->isPointerTy())
      PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
  if (PointerArgs.empty()) return 0;

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.
  if (F->hasAddressTaken())
    return 0;

  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
  SmallPtrSet<Argument*, 8> ArgsToPromote;
  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
  for (unsigned i = 0; i != PointerArgs.size(); ++i) {
    bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);

    // If this is a byval argument, and if the aggregate type is small, just
    // pass the elements, which is always safe.
    Argument *PtrArg = PointerArgs[i].first;
    if (isByVal) {
      const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
        if (maxElements > 0 && STy->getNumElements() > maxElements) {
          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                << PtrArg->getName() << "' because it would require adding more"
                << " than " << maxElements << " arguments to the function.\n");
        } else {
          // If all the elements are single-value types, we can promote it.
          bool AllSimple = true;
          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
            if (!STy->getElementType(i)->isSingleValueType()) {
              AllSimple = false;
              break;
            }

          // Safe to transform, don't even bother trying to "promote" it.
          // Passing the elements as a scalar will allow scalarrepl to hack on
          // the new alloca we introduce.
          if (AllSimple) {
            ByValArgsToTransform.insert(PtrArg);
            continue;
          }
        }
      }
    }

    // Otherwise, see if we can promote the pointer to its value.
    if (isSafeToPromoteArgument(PtrArg, isByVal))
      ArgsToPromote.insert(PtrArg);
  }

  // No promotable pointer arguments.
  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
    return 0;

  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
Exemple #14
0
/// Remove dead functions that are not included in DNR (Do Not Remove) list.
bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
  SmallVector<CallGraphNode*, 16> FunctionsToRemove;

  // Scan for all of the functions, looking for ones that should now be removed
  // from the program.  Insert the dead ones in the FunctionsToRemove set.
  for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
    CallGraphNode *CGN = I->second;
    Function *F = CGN->getFunction();
    if (!F || F->isDeclaration())
      continue;

    // Handle the case when this function is called and we only want to care
    // about always-inline functions. This is a bit of a hack to share code
    // between here and the InlineAlways pass.
    if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
      continue;

    // If the only remaining users of the function are dead constants, remove
    // them.
    F->removeDeadConstantUsers();

    if (!F->isDefTriviallyDead())
      continue;

    // It is unsafe to drop a function with discardable linkage from a COMDAT
    // without also dropping the other members of the COMDAT.
    // The inliner doesn't visit non-function entities which are in COMDAT
    // groups so it is unsafe to do so *unless* the linkage is local.
    if (!F->hasLocalLinkage() && F->hasComdat())
      continue;
    
    // Remove any call graph edges from the function to its callees.
    CGN->removeAllCalledFunctions();

    // Remove any edges from the external node to the function's call graph
    // node.  These edges might have been made irrelegant due to
    // optimization of the program.
    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);

    // Removing the node for callee from the call graph and delete it.
    FunctionsToRemove.push_back(CGN);
  }
  if (FunctionsToRemove.empty())
    return false;

  // Now that we know which functions to delete, do so.  We didn't want to do
  // this inline, because that would invalidate our CallGraph::iterator
  // objects. :(
  //
  // Note that it doesn't matter that we are iterating over a non-stable order
  // here to do this, it doesn't matter which order the functions are deleted
  // in.
  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
  FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
                                      FunctionsToRemove.end()),
                          FunctionsToRemove.end());
  for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
                                                  E = FunctionsToRemove.end();
       I != E; ++I) {
    delete CG.removeFunctionFromModule(*I);
    ++NumDeleted;
  }
  return true;
}
Exemple #15
0
/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerReadAttrs(Argument *A,
                          const SmallPtrSet<Argument *, 8> &SCCNodes) {

  SmallVector<Use *, 32> Worklist;
  SmallSet<Use *, 32> Visited;

  // inalloca arguments are always clobbered by the call.
  if (A->hasInAllocaAttr())
    return Attribute::None;

  bool IsRead = false;
  // We don't need to track IsWritten. If A is written to, return immediately.

  for (Use &U : A->uses()) {
    Visited.insert(&U);
    Worklist.push_back(&U);
  }

  while (!Worklist.empty()) {
    Use *U = Worklist.pop_back_val();
    Instruction *I = cast<Instruction>(U->getUser());

    switch (I->getOpcode()) {
    case Instruction::BitCast:
    case Instruction::GetElementPtr:
    case Instruction::PHI:
    case Instruction::Select:
    case Instruction::AddrSpaceCast:
      // The original value is not read/written via this if the new value isn't.
      for (Use &UU : I->uses())
        if (Visited.insert(&UU).second)
          Worklist.push_back(&UU);
      break;

    case Instruction::Call:
    case Instruction::Invoke: {
      bool Captures = true;

      if (I->getType()->isVoidTy())
        Captures = false;

      auto AddUsersToWorklistIfCapturing = [&] {
        if (Captures)
          for (Use &UU : I->uses())
            if (Visited.insert(&UU).second)
              Worklist.push_back(&UU);
      };

      CallSite CS(I);
      if (CS.doesNotAccessMemory()) {
        AddUsersToWorklistIfCapturing();
        continue;
      }

      Function *F = CS.getCalledFunction();
      if (!F) {
        if (CS.onlyReadsMemory()) {
          IsRead = true;
          AddUsersToWorklistIfCapturing();
          continue;
        }
        return Attribute::None;
      }

      // Note: the callee and the two successor blocks *follow* the argument
      // operands.  This means there is no need to adjust UseIndex to account
      // for these.

      unsigned UseIndex = std::distance(CS.arg_begin(), U);

      // U cannot be the callee operand use: since we're exploring the
      // transitive uses of an Argument, having such a use be a callee would
      // imply the CallSite is an indirect call or invoke; and we'd take the
      // early exit above.
      assert(UseIndex < CS.data_operands_size() &&
             "Data operand use expected!");

      bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();

      if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
        assert(F->isVarArg() && "More params than args in non-varargs call");
        return Attribute::None;
      }

      Captures &= !CS.doesNotCapture(UseIndex);

      // Since the optimizer (by design) cannot see the data flow corresponding
      // to a operand bundle use, these cannot participate in the optimistic SCC
      // analysis.  Instead, we model the operand bundle uses as arguments in
      // call to a function external to the SCC.
      if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
          IsOperandBundleUse) {

        // The accessors used on CallSite here do the right thing for calls and
        // invokes with operand bundles.

        if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
          return Attribute::None;
        if (!CS.doesNotAccessMemory(UseIndex))
          IsRead = true;
      }

      AddUsersToWorklistIfCapturing();
      break;
    }

    case Instruction::Load:
      IsRead = true;
      break;

    case Instruction::ICmp:
    case Instruction::Ret:
      break;

    default:
      return Attribute::None;
    }
  }

  return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
}
Exemple #16
0
void SILGenFunction::emitCaptures(SILLocation loc,
                                  AnyFunctionRef closure,
                                  CaptureEmission purpose,
                                  SmallVectorImpl<ManagedValue> &capturedArgs) {
  auto captureInfo = SGM.Types.getLoweredLocalCaptures(closure);
  // For boxed captures, we need to mark the contained variables as having
  // escaped for DI diagnostics.
  SmallVector<SILValue, 2> escapesToMark;
  
  // Partial applications take ownership of the context parameters, so we'll
  // need to pass ownership rather than merely guaranteeing parameters.
  bool canGuarantee;
  switch (purpose) {
  case CaptureEmission::PartialApplication:
    canGuarantee = false;
    break;
  case CaptureEmission::ImmediateApplication:
    canGuarantee = true;
    break;
  }
  
  for (auto capture : captureInfo.getCaptures()) {
    if (capture.isDynamicSelfMetadata()) {
      // The parameter type is the static Self type, but the value we
      // want to pass is the dynamic Self type, so upcast it.
      auto dynamicSelfMetatype = MetatypeType::get(
        captureInfo.getDynamicSelfType());
      SILType dynamicSILType = getLoweredType(dynamicSelfMetatype);

      SILValue value = B.createMetatype(loc, dynamicSILType);
      capturedArgs.push_back(ManagedValue::forUnmanaged(value));
      continue;
    }

    auto *vd = capture.getDecl();

    switch (SGM.Types.getDeclCaptureKind(capture)) {
    case CaptureKind::None:
      break;

    case CaptureKind::Constant: {
      // let declarations.
      auto found = VarLocs.find(vd);
      assert(found != VarLocs.end());
      auto Entry = found->second;

      auto *var = cast<VarDecl>(vd);
      auto &tl = getTypeLowering(var->getType()->getReferenceStorageReferent());
      SILValue Val = Entry.value;

      if (!Val->getType().isAddress()) {
        // Our 'let' binding can guarantee the lifetime for the callee,
        // if we don't need to do anything more to it.
        if (canGuarantee && !var->getType()->is<ReferenceStorageType>()) {
          auto guaranteed = ManagedValue::forUnmanaged(Val).borrow(*this, loc);
          capturedArgs.push_back(guaranteed);
          break;
        }
      
        // Just retain a by-val let.
        Val = B.emitCopyValueOperation(loc, Val);
      } else {
        // If we have a mutable binding for a 'let', such as 'self' in an
        // 'init' method, load it.
        Val = emitLoad(loc, Val, tl, SGFContext(), IsNotTake).forward(*this);
      }

      // If we're capturing an unowned pointer by value, we will have just
      // loaded it into a normal retained class pointer, but we capture it as
      // an unowned pointer.  Convert back now.
      if (var->getType()->is<ReferenceStorageType>()) {
        auto type = getLoweredType(var->getType());
        Val = emitConversionFromSemanticValue(loc, Val, type);
      }

      capturedArgs.push_back(emitManagedRValueWithCleanup(Val));
      break;
    }

    case CaptureKind::StorageAddress: {
      // No-escaping stored declarations are captured as the
      // address of the value.
      assert(VarLocs.count(vd) && "no location for captured var!");
      VarLoc vl = VarLocs[vd];
      assert(vl.value->getType().isAddress() && "no address for captured var!");
      capturedArgs.push_back(ManagedValue::forLValue(vl.value));
      break;
    }

    case CaptureKind::Box: {
      // LValues are captured as both the box owning the value and the
      // address of the value.
      assert(VarLocs.count(vd) && "no location for captured var!");
      VarLoc vl = VarLocs[vd];
      assert(vl.value->getType().isAddress() && "no address for captured var!");

      // If this is a boxed variable, we can use it directly.
      if (vl.box) {
        // We can guarantee our own box to the callee.
        if (canGuarantee) {
          capturedArgs.push_back(
              ManagedValue::forUnmanaged(vl.box).borrow(*this, loc));
        } else {
          capturedArgs.push_back(emitManagedRetain(loc, vl.box));
        }
        escapesToMark.push_back(vl.value);
      } else {
        // Address only 'let' values are passed by box.  This isn't great, in
        // that a variable captured by multiple closures will be boxed for each
        // one.  This could be improved by doing an "isCaptured" analysis when
        // emitting address-only let constants, and emit them into an alloc_box
        // like a variable instead of into an alloc_stack.
        //
        // TODO: This might not be profitable anymore with guaranteed captures,
        // since we could conceivably forward the copied value into the
        // closure context and pass it down to the partially applied function
        // in-place.
        // TODO: Use immutable box for immutable captures.
        auto boxTy = SGM.Types.getContextBoxTypeForCapture(vd,
                                       vl.value->getType().getASTType(),
                                       F.getGenericEnvironment(),
                                       /*mutable*/ true);
        
        AllocBoxInst *allocBox = B.createAllocBox(loc, boxTy);
        ProjectBoxInst *boxAddress = B.createProjectBox(loc, allocBox, 0);
        B.createCopyAddr(loc, vl.value, boxAddress, IsNotTake,
                         IsInitialization);
        if (canGuarantee)
          capturedArgs.push_back(
              emitManagedRValueWithCleanup(allocBox).borrow(*this, loc));
        else
          capturedArgs.push_back(emitManagedRValueWithCleanup(allocBox));
      }

      break;
    }
    }
  }
  
  // Mark box addresses as captured for DI purposes. The values must have
  // been fully initialized before we close over them.
  if (!escapesToMark.empty()) {
    B.createMarkFunctionEscape(loc, escapesToMark);
  }
}
Exemple #17
0
void StackAllocationPromoter::promoteAllocationToPhi() {
  DEBUG(llvm::dbgs() << "*** Placing Phis for : " << *ASI);

  // A list of blocks that will require new Phi values.
  BlockSet PhiBlocks;

  // The "piggy-bank" data-structure that we use for processing the dom-tree
  // bottom-up.
  NodePriorityQueue PQ;

  // Collect all of the stores into the AllocStack. We know that at this point
  // we have at most one store per block.
  for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) {
    SILInstruction *II = UI->getUser();
    // We need to place Phis for this block.
    if (isa<StoreInst>(II)) {
      // If the block is in the dom tree (dominated by the entry block).
      if (DomTreeNode *Node = DT->getNode(II->getParent()))
        PQ.push(std::make_pair(Node, DomTreeLevels[Node]));
    }
  }

  DEBUG(llvm::dbgs() << "*** Found: " << PQ.size() << " Defs\n");

  // A list of nodes for which we already calculated the dominator frontier.
  llvm::SmallPtrSet<DomTreeNode *, 32> Visited;

  SmallVector<DomTreeNode *, 32> Worklist;

  // Scan all of the definitions in the function bottom-up using the priority
  // queue.
  while (!PQ.empty()) {
    DomTreeNodePair RootPair = PQ.top();
    PQ.pop();
    DomTreeNode *Root = RootPair.first;
    unsigned RootLevel = RootPair.second;

    // Walk all dom tree children of Root, inspecting their successors. Only
    // J-edges, whose target level is at most Root's level are added to the
    // dominance frontier.
    Worklist.clear();
    Worklist.push_back(Root);

    while (!Worklist.empty()) {
      DomTreeNode *Node = Worklist.pop_back_val();
      SILBasicBlock *BB = Node->getBlock();

      // For all successors of the node:
      for (auto &Succ : BB->getSuccessors()) {
        DomTreeNode *SuccNode = DT->getNode(Succ);

        // Skip D-edges (edges that are dom-tree edges).
        if (SuccNode->getIDom() == Node)
          continue;

        // Ignore J-edges that point to nodes that are not smaller or equal
        // to the root level.
        unsigned SuccLevel = DomTreeLevels[SuccNode];
        if (SuccLevel > RootLevel)
          continue;

        // Ignore visited nodes.
        if (!Visited.insert(SuccNode).second)
          continue;

        // If the new PHInode is not dominated by the allocation then it's dead.
        if (!DT->dominates(ASI->getParent(), SuccNode->getBlock()))
            continue;

        // If the new PHInode is properly dominated by the deallocation then it
        // is obviously a dead PHInode, so we don't need to insert it.
        if (DSI && DT->properlyDominates(DSI->getParent(),
                                         SuccNode->getBlock()))
          continue;

        // The successor node is a new PHINode. If this is a new PHI node
        // then it may require additional definitions, so add it to the PQ.
        if (PhiBlocks.insert(Succ).second)
          PQ.push(std::make_pair(SuccNode, SuccLevel));
      }

      // Add the children in the dom-tree to the worklist.
      for (auto CI = Node->begin(), CE = Node->end(); CI != CE; ++CI)
        if (!Visited.count(*CI))
          Worklist.push_back(*CI);
    }
  }

  DEBUG(llvm::dbgs() << "*** Found: " << PhiBlocks.size() << " new PHIs\n");
  NumPhiPlaced += PhiBlocks.size();

  // At this point we calculated the locations of all of the new Phi values.
  // Next, add the Phi values and promote all of the loads and stores into the
  // new locations.

  // Replace the dummy values with new block arguments.
  addBlockArguments(PhiBlocks);

  // Hook up the Phi nodes, loads, and debug_value_addr with incoming values.
  fixBranchesAndUses(PhiBlocks);

  DEBUG(llvm::dbgs() << "*** Finished placing Phis ***\n");
}
Exemple #18
0
void SILGenFunction::emitArtificialTopLevel(ClassDecl *mainClass) {
  // Load argc and argv from the entry point arguments.
  SILValue argc = F.begin()->getArgument(0);
  SILValue argv = F.begin()->getArgument(1);

  switch (mainClass->getArtificialMainKind()) {
  case ArtificialMainKind::UIApplicationMain: {
    // Emit a UIKit main.
    // return UIApplicationMain(C_ARGC, C_ARGV, nil, ClassName);

    CanType NSStringTy = SGM.Types.getNSStringType();
    CanType OptNSStringTy
      = OptionalType::get(NSStringTy)->getCanonicalType();

    // Look up UIApplicationMain.
    // FIXME: Doing an AST lookup here is gross and not entirely sound;
    // we're getting away with it because the types are guaranteed to already
    // be imported.
    ASTContext &ctx = getASTContext();
    ModuleDecl *UIKit = ctx.getLoadedModule(ctx.getIdentifier("UIKit"));
    SmallVector<ValueDecl *, 1> results;
    UIKit->lookupQualified(UIKit->getInterfaceType(),
                           ctx.getIdentifier("UIApplicationMain"),
                           NL_QualifiedDefault,
                           /*resolver*/nullptr,
                           results);
    assert(!results.empty() && "couldn't find UIApplicationMain in UIKit");
    assert(results.size() == 1 && "more than one UIApplicationMain?");

    auto mainRef = SILDeclRef(results.front()).asForeign();
    auto UIApplicationMainFn = SGM.M.getOrCreateFunction(mainClass, mainRef,
                                                         NotForDefinition);
    auto fnTy = UIApplicationMainFn->getLoweredFunctionType();
    SILFunctionConventions fnConv(fnTy, SGM.M);

    // Get the class name as a string using NSStringFromClass.
    CanType mainClassTy = mainClass->getDeclaredInterfaceType()
        ->getCanonicalType();
    CanType mainClassMetaty = CanMetatypeType::get(mainClassTy,
                                                   MetatypeRepresentation::ObjC);
    CanType anyObjectTy = ctx.getAnyObjectType();
    CanType anyObjectMetaTy = CanExistentialMetatypeType::get(anyObjectTy,
                                                  MetatypeRepresentation::ObjC);

    auto NSStringFromClassType = SILFunctionType::get(nullptr,
                  SILFunctionType::ExtInfo()
                    .withRepresentation(SILFunctionType::Representation::
                                        CFunctionPointer),
                  SILCoroutineKind::None,
                  ParameterConvention::Direct_Unowned,
                  SILParameterInfo(anyObjectMetaTy,
                                   ParameterConvention::Direct_Unowned),
                  /*yields*/ {},
                  SILResultInfo(OptNSStringTy,
                                ResultConvention::Autoreleased),
                  /*error result*/ None,
                  ctx);
    auto NSStringFromClassFn
      = SGM.M.getOrCreateFunction(mainClass, "NSStringFromClass",
                                  SILLinkage::PublicExternal,
                                  NSStringFromClassType,
                                  IsBare, IsTransparent, IsNotSerialized);
    auto NSStringFromClass = B.createFunctionRef(mainClass, NSStringFromClassFn);
    SILValue metaTy = B.createMetatype(mainClass,
                             SILType::getPrimitiveObjectType(mainClassMetaty));
    metaTy = B.createInitExistentialMetatype(mainClass, metaTy,
                          SILType::getPrimitiveObjectType(anyObjectMetaTy), {});
    SILValue optName = B.createApply(mainClass,
                               NSStringFromClass,
                               NSStringFromClass->getType(),
                               SILType::getPrimitiveObjectType(OptNSStringTy),
                               {}, metaTy);

    // Fix up the string parameters to have the right type.
    SILType nameArgTy = fnConv.getSILArgumentType(3);
    assert(nameArgTy == fnConv.getSILArgumentType(2));
    auto managedName = ManagedValue::forUnmanaged(optName);
    SILValue nilValue;
    assert(optName->getType() == nameArgTy);
    nilValue = getOptionalNoneValue(mainClass,
                                    getTypeLowering(OptNSStringTy));

    // Fix up argv to have the right type.
    auto argvTy = fnConv.getSILArgumentType(1);

    SILType unwrappedTy = argvTy;
    if (Type innerTy = argvTy.getASTType()->getOptionalObjectType()) {
      auto canInnerTy = innerTy->getCanonicalType();
      unwrappedTy = SILType::getPrimitiveObjectType(canInnerTy);
    }

    auto managedArgv = ManagedValue::forUnmanaged(argv);

    if (unwrappedTy != argv->getType()) {
      auto converted =
          emitPointerToPointer(mainClass, managedArgv,
                               argv->getType().getASTType(),
                               unwrappedTy.getASTType());
      managedArgv = std::move(converted).getAsSingleValue(*this, mainClass);
    }

    if (unwrappedTy != argvTy) {
      managedArgv = getOptionalSomeValue(mainClass, managedArgv,
                                         getTypeLowering(argvTy));
    }

    auto UIApplicationMain = B.createFunctionRef(mainClass, UIApplicationMainFn);

    SILValue args[] = {argc, managedArgv.getValue(), nilValue,
                       managedName.getValue()};

    B.createApply(mainClass, UIApplicationMain,
                  UIApplicationMain->getType(),
                  argc->getType(), {}, args);
    SILValue r = B.createIntegerLiteral(mainClass,
                        SILType::getBuiltinIntegerType(32, ctx), 0);
    auto rType = F.getConventions().getSingleSILResultType();
    if (r->getType() != rType)
      r = B.createStruct(mainClass, rType, r);

    Cleanups.emitCleanupsForReturn(mainClass);
    B.createReturn(mainClass, r);
    return;
  }

  case ArtificialMainKind::NSApplicationMain: {
    // Emit an AppKit main.
    // return NSApplicationMain(C_ARGC, C_ARGV);

    SILParameterInfo argTypes[] = {
      SILParameterInfo(argc->getType().getASTType(),
                       ParameterConvention::Direct_Unowned),
      SILParameterInfo(argv->getType().getASTType(),
                       ParameterConvention::Direct_Unowned),
    };
    auto NSApplicationMainType = SILFunctionType::get(nullptr,
                  SILFunctionType::ExtInfo()
                    // Should be C calling convention, but NSApplicationMain
                    // has an overlay to fix the type of argv.
                    .withRepresentation(SILFunctionType::Representation::Thin),
                  SILCoroutineKind::None,
                  ParameterConvention::Direct_Unowned,
                  argTypes,
                  /*yields*/ {},
                  SILResultInfo(argc->getType().getASTType(),
                                ResultConvention::Unowned),
                  /*error result*/ None,
                  getASTContext());

    auto NSApplicationMainFn
      = SGM.M.getOrCreateFunction(mainClass, "NSApplicationMain",
                                  SILLinkage::PublicExternal,
                                  NSApplicationMainType,
                                  IsBare, IsTransparent, IsNotSerialized);

    auto NSApplicationMain = B.createFunctionRef(mainClass, NSApplicationMainFn);
    SILValue args[] = { argc, argv };

    B.createApply(mainClass, NSApplicationMain,
                  NSApplicationMain->getType(),
                  argc->getType(), {}, args);
    SILValue r = B.createIntegerLiteral(mainClass,
                        SILType::getBuiltinIntegerType(32, getASTContext()), 0);
    auto rType = F.getConventions().getSingleSILResultType();
    if (r->getType() != rType)
      r = B.createStruct(mainClass, rType, r);
    B.createReturn(mainClass, r);
    return;
  }
  }
}
Exemple #19
0
SDValue
Cpu0TargetLowering::LowerCall(SDValue InChain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
                              bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) const {
  // Cpu0 target does not yet support tail call optimization.
  isTailCall = false;

  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
  Cpu0FunctionInfo *Cpu0FI = MF.getInfo<Cpu0FunctionInfo>();

  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
                 getTargetMachine(), ArgLocs, *DAG.getContext());

  CCInfo.AnalyzeCallOperands(Outs, CC_Cpu0);

  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NextStackOffset = CCInfo.getNextStackOffset();

  // If this is the first call, create a stack frame object that points to
  // a location to which .cprestore saves $gp.
  if (IsPIC && Cpu0FI->globalBaseRegFixed() && !Cpu0FI->getGPFI())
    Cpu0FI->setGPFI(MFI->CreateFixedObject(4, 0, true));
  // Get the frame index of the stack frame object that points to the location
  // of dynamically allocated area on the stack.
  int DynAllocFI = Cpu0FI->getDynAllocFI();
  unsigned MaxCallFrameSize = Cpu0FI->getMaxCallFrameSize();

  if (MaxCallFrameSize < NextStackOffset) {
    Cpu0FI->setMaxCallFrameSize(NextStackOffset);

    // Set the offsets relative to $sp of the $gp restore slot and dynamically
    // allocated stack space. These offsets must be aligned to a boundary
    // determined by the stack alignment of the ABI.
    unsigned StackAlignment = TFL->getStackAlignment();
    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
                      StackAlignment * StackAlignment;
    if (Cpu0FI->needGPSaveRestore())
      MFI->setObjectOffset(Cpu0FI->getGPFI(), NextStackOffset);

    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
  }
  // Chain is the output chain of the last Load/Store or CopyToReg node.
  // ByValChain is the output chain of the last Memcpy node created for copying
  // byval arguments to the stack.
  SDValue Chain, CallSeqStart, ByValChain;
  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
  Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
  ByValChain = InChain;

  // With EABI is it possible to have 16 args on registers.
  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;

  int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;

  // Walk the register/memloc assignments, inserting copies/loads.
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    SDValue Arg = OutVals[i];
    CCValAssign &VA = ArgLocs[i];
    MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
    ISD::ArgFlagsTy Flags = Outs[i].Flags;

    // ByVal Arg.
    if (Flags.isByVal()) {
      assert("!!!Error!!!, Flags.isByVal()==true");
      assert(Flags.getByValSize() &&
             "ByVal args of size 0 should have been ignored by front-end.");
      continue;
    }

    // Register can't get to this point...
    assert(VA.isMemLoc());

    // Create the frame index object for this incoming parameter
    LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                    VA.getLocMemOffset(), true);
    SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());

    // emit ISD::STORE whichs stores the
    // parameter value to a stack Location
    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                       MachinePointerInfo(), false, false, 0));
  }

  // Extend range of indices of frame objects for outgoing arguments that were
  // created during this function call. Skip this step if no such objects were
  // created.
  if (LastFI)
    Cpu0FI->extendOutArgFIRange(FirstFI, LastFI);

  // If a memcpy has been created to copy a byval arg to a stack, replace the
  // chain input of CallSeqStart with ByValChain.
  if (InChain != ByValChain)
    DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
                           NextStackOffsetVal);

  // Transform all store nodes into one single node because all store
  // nodes are independent of each other.
  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());

  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
  // node so that legalize doesn't hack it.
  unsigned char OpFlag;
  bool IsPICCall = IsPIC; // true if calls are translated to jalr $25
  bool GlobalOrExternal = false;
  SDValue CalleeLo;

  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    OpFlag = IsPICCall ? Cpu0II::MO_GOT_CALL : Cpu0II::MO_NO_FLAG;
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                          getPointerTy(), 0, OpFlag);
    GlobalOrExternal = true;
  }
  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    if (!IsPIC) // static
      OpFlag = Cpu0II::MO_NO_FLAG;
    else // O32 & PIC
      OpFlag = Cpu0II::MO_GOT_CALL;
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
                                         OpFlag);
    GlobalOrExternal = true;
  }

  SDValue InFlag;

  // Create nodes that load address of callee and copy it to T9
  if (IsPICCall) {
    if (GlobalOrExternal) {
      // Load callee address
      Callee = DAG.getNode(Cpu0ISD::Wrapper, dl, getPointerTy(),
                           GetGlobalReg(DAG, getPointerTy()), Callee);
      SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                                      Callee, MachinePointerInfo::getGOT(),
                                      false, false, false, 0);

      // Use GOT+LO if callee has internal linkage.
      if (CalleeLo.getNode()) {
        SDValue Lo = DAG.getNode(Cpu0ISD::Lo, dl, getPointerTy(), CalleeLo);
        Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo);
      } else
        Callee = LoadValue;
    }
  }

  // T9 should contain the address of the callee function if
  // -reloction-model=pic or it is an indirect call.
  if (IsPICCall || !GlobalOrExternal) {
    // copy to T9
    unsigned T9Reg = Cpu0::T9;
    Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
    InFlag = Chain.getValue(1);
    Callee = DAG.getRegister(T9Reg, getPointerTy());
  }

  // Cpu0JmpLink = #chain, #target_address, #opt_in_flags...
  //             = Chain, Callee, Reg#1, Reg#2, ...
  //
  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  // Add argument registers to the end of the list so that they are
  // known live into the call.
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                  RegsToPass[i].second.getValueType()));

  // Add a register mask operand representing the call-preserved registers.
  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
  assert(Mask && "Missing call preserved mask for calling convention");
  Ops.push_back(DAG.getRegisterMask(Mask));

  if (InFlag.getNode())
    Ops.push_back(InFlag);

  Chain  = DAG.getNode(Cpu0ISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
  InFlag = Chain.getValue(1);

  // Create the CALLSEQ_END node.
  Chain = DAG.getCALLSEQ_END(Chain,
                             DAG.getIntPtrConstant(NextStackOffset, true),
                             DAG.getIntPtrConstant(0, true), InFlag);
  InFlag = Chain.getValue(1);

  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
                         Ins, dl, DAG, InVals);
}
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
  // it in this generic function.
  if (Succ->isLandingPad())
    return NULL;

  MachineFunction *MF = getParent();
  DebugLoc dl;  // FIXME: this is nowhere

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
  MachineBasicBlock *TBB = 0, *FBB = 0;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return NULL;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return NULL;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
  SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
  if (LIS)
    LIS->insertMBBInMaps(NMBB);
  else if (Indexes)
    Indexes->insertMBBInMaps(NMBB);

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0 ||
            !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  SmallVector<unsigned, 4> UsedRegs;
  if (LIS) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I) {
      MachineInstr *MI = I;

      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || OI->getReg() == 0)
          continue;

        unsigned Reg = OI->getReg();
        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
          UsedRegs.push_back(Reg);
      }
    }
  }

  ReplaceUsesOfBlockWith(Succ, NMBB);

  // If updateTerminator() removes instructions, we need to remove them from
  // SlotIndexes.
  SmallVector<MachineInstr*, 4> Terminators;
  if (Indexes) {
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      Terminators.push_back(I);
  }

  updateTerminator();

  if (Indexes) {
    SmallVector<MachineInstr*, 4> NewTerminators;
    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
         I != E; ++I)
      NewTerminators.push_back(I);

    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
        E = Terminators.end(); I != E; ++I) {
      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
          NewTerminators.end())
       Indexes->removeMachineInstrFromMaps(*I);
    }
  }

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);

    if (Indexes) {
      for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
           I != E; ++I) {
        // Some instructions may have been moved to NMBB by updateTerminator(),
        // so we first remove any instruction that already has an index.
        if (Indexes->hasIndex(I))
          Indexes->removeMachineInstrFromMaps(I);
        Indexes->insertMachineInstrInMaps(I);
      }
    }
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::instr_iterator
         i = Succ->instr_begin(),e = Succ->instr_end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
         E = Succ->livein_end(); I != E; ++I)
    NMBB->addLiveIn(*I);

  // Update LiveVariables.
  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
          continue;
        if (TargetRegisterInfo::isVirtualRegister(Reg))
          LV->getVarInfo(Reg).Kills.push_back(I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (LIS) {
    // After splitting the edge and updating SlotIndexes, live intervals may be
    // in one of two situations, depending on whether this block was the last in
    // the function. If the original block was the last in the function, all live
    // intervals will end prior to the beginning of the new split block. If the
    // original block was not at the end of the function, all live intervals will
    // extend to the end of the new split block.

    bool isLastMBB =
      llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();

    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
    SlotIndex PrevIndex = StartIndex.getPrevSlot();
    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);

    // Find the registers used from NMBB in PHIs in Succ.
    SmallSet<unsigned, 8> PHISrcRegs;
    for (MachineBasicBlock::instr_iterator
         I = Succ->instr_begin(), E = Succ->instr_end();
         I != E && I->isPHI(); ++I) {
      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
        if (I->getOperand(ni+1).getMBB() == NMBB) {
          MachineOperand &MO = I->getOperand(ni);
          unsigned Reg = MO.getReg();
          PHISrcRegs.insert(Reg);
          if (MO.isUndef())
            continue;

          LiveInterval &LI = LIS->getInterval(Reg);
          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
          assert(VNI && "PHI sources should be live out of their predecessors.");
          LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
        }
      }
    }

    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
        continue;

      LiveInterval &LI = LIS->getInterval(Reg);
      if (!LI.liveAt(PrevIndex))
        continue;

      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
      if (isLiveOut && isLastMBB) {
        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
        assert(VNI && "LiveInterval should have VNInfo where it is live.");
        LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
      } else if (!isLiveOut && !isLastMBB) {
        LI.removeRange(StartIndex, EndIndex);
      }
    }

    // Update all intervals for registers whose uses may have been modified by
    // updateTerminator().
    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
    // Update dominator information.
    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);

    bool IsNewIDom = true;
    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
         PI != E; ++PI) {
      MachineBasicBlock *PredBB = *PI;
      if (PredBB == NMBB)
        continue;
      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
        IsNewIDom = false;
        break;
      }
    }

    // We know "this" dominates the newly created basic block.
    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);

    // If all the other predecessors of "Succ" are dominated by "Succ" itself
    // then the new block is the new immediate dominator of "Succ". Otherwise,
    // the new block doesn't dominate anything.
    if (IsNewIDom)
      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
  }

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
Exemple #21
0
bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
  SmallVector<ReturnInst*,16> Returns;
  SmallVector<UnwindInst*,16> Unwinds;
  SmallVector<InvokeInst*,16> Invokes;

  // Look through the terminators of the basic blocks to find invokes, returns
  // and unwinds.
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
      // Remember all return instructions in case we insert an invoke into this
      // function.
      Returns.push_back(RI);
    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      Invokes.push_back(II);
    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
      Unwinds.push_back(UI);
    }
  }
  // If we don't have any invokes or unwinds, there's nothing to do.
  if (Unwinds.empty() && Invokes.empty()) return false;

  // Find the eh.selector.*, eh.exception and alloca calls.
  //
  // Remember any allocas() that aren't in the entry block, as the
  // jmpbuf saved SP will need to be updated for them.
  //
  // We'll use the first eh.selector to determine the right personality
  // function to use. For SJLJ, we always use the same personality for the
  // whole function, not on a per-selector basis.
  // FIXME: That's a bit ugly. Better way?
  SmallVector<CallInst*,16> EH_Selectors;
  SmallVector<CallInst*,16> EH_Exceptions;
  SmallVector<Instruction*,16> JmpbufUpdatePoints;
  // Note: Skip the entry block since there's nothing there that interests
  // us. eh.selector and eh.exception shouldn't ever be there, and we
  // want to disregard any allocas that are there.
  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
      if (CallInst *CI = dyn_cast<CallInst>(I)) {
        if (CI->getCalledFunction() == SelectorFn) {
          if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
          EH_Selectors.push_back(CI);
        } else if (CI->getCalledFunction() == ExceptionFn) {
          EH_Exceptions.push_back(CI);
        } else if (CI->getCalledFunction() == StackRestoreFn) {
          JmpbufUpdatePoints.push_back(CI);
        }
      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
        JmpbufUpdatePoints.push_back(AI);
      }
    }
  }
  // If we don't have any eh.selector calls, we can't determine the personality
  // function. Without a personality function, we can't process exceptions.
  if (!PersonalityFn) return false;

  NumInvokes += Invokes.size();
  NumUnwinds += Unwinds.size();

  if (!Invokes.empty()) {
    // We have invokes, so we need to add register/unregister calls to get
    // this function onto the global unwind stack.
    //
    // First thing we need to do is scan the whole function for values that are
    // live across unwind edges.  Each value that is live across an unwind edge
    // we spill into a stack location, guaranteeing that there is nothing live
    // across the unwind edge.  This process also splits all critical edges
    // coming out of invoke's.
    splitLiveRangesAcrossInvokes(Invokes);

    BasicBlock *EntryBB = F.begin();
    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
    // that needs to be restored on all exits from the function.  This is an
    // alloca because the value needs to be added to the global context list.
    unsigned Align = 4; // FIXME: Should be a TLI check?
    AllocaInst *FunctionContext =
      new AllocaInst(FunctionContextTy, 0, Align,
                     "fcn_context", F.begin()->begin());

    Value *Idxs[2];
    const Type *Int32Ty = Type::getInt32Ty(F.getContext());
    Value *Zero = ConstantInt::get(Int32Ty, 0);
    // We need to also keep around a reference to the call_site field
    Idxs[0] = Zero;
    Idxs[1] = ConstantInt::get(Int32Ty, 1);
    CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
                                         "call_site",
                                         EntryBB->getTerminator());

    // The exception selector comes back in context->data[1]
    Idxs[1] = ConstantInt::get(Int32Ty, 2);
    Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
                                              "fc_data",
                                              EntryBB->getTerminator());
    Idxs[1] = ConstantInt::get(Int32Ty, 1);
    Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
                                                    "exc_selector_gep",
                                                    EntryBB->getTerminator());
    // The exception value comes back in context->data[0]
    Idxs[1] = Zero;
    Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
                                                     "exception_gep",
                                                     EntryBB->getTerminator());

    // The result of the eh.selector call will be replaced with a
    // a reference to the selector value returned in the function
    // context. We leave the selector itself so the EH analysis later
    // can use it.
    for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
      CallInst *I = EH_Selectors[i];
      Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
      I->replaceAllUsesWith(SelectorVal);
    }
    // eh.exception calls are replaced with references to the proper
    // location in the context. Unlike eh.selector, the eh.exception
    // calls are removed entirely.
    for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
      CallInst *I = EH_Exceptions[i];
      // Possible for there to be duplicates, so check to make sure
      // the instruction hasn't already been removed.
      if (!I->getParent()) continue;
      Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
      const Type *Ty = Type::getInt8PtrTy(F.getContext());
      Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);

      I->replaceAllUsesWith(Val);
      I->eraseFromParent();
    }

    // The entry block changes to have the eh.sjlj.setjmp, with a conditional
    // branch to a dispatch block for non-zero returns. If we return normally,
    // we're not handling an exception and just register the function context
    // and continue.

    // Create the dispatch block.  The dispatch block is basically a big switch
    // statement that goes to all of the invoke landing pads.
    BasicBlock *DispatchBlock =
            BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);

    // Insert a load in the Catch block, and a switch on its value.  By default,
    // we go to a block that just does an unwind (which is the correct action
    // for a standard call).
    BasicBlock *UnwindBlock =
      BasicBlock::Create(F.getContext(), "unwindbb", &F);
    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));

    Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
                                       DispatchBlock);
    SwitchInst *DispatchSwitch =
      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
                         DispatchBlock);
    // Split the entry block to insert the conditional branch for the setjmp.
    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                     "eh.sjlj.setjmp.cont");

    // Populate the Function Context
    //   1. LSDA address
    //   2. Personality function address
    //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)

    // LSDA address
    Idxs[0] = Zero;
    Idxs[1] = ConstantInt::get(Int32Ty, 4);
    Value *LSDAFieldPtr =
      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
                                "lsda_gep",
                                EntryBB->getTerminator());
    Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
                                   EntryBB->getTerminator());
    new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());

    Idxs[1] = ConstantInt::get(Int32Ty, 3);
    Value *PersonalityFieldPtr =
      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
                                "lsda_gep",
                                EntryBB->getTerminator());
    new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
                  EntryBB->getTerminator());

    // Save the frame pointer.
    Idxs[1] = ConstantInt::get(Int32Ty, 5);
    Value *JBufPtr
      = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
                                  "jbuf_gep",
                                  EntryBB->getTerminator());
    Idxs[1] = ConstantInt::get(Int32Ty, 0);
    Value *FramePtr =
      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
                                EntryBB->getTerminator());

    Value *Val = CallInst::Create(FrameAddrFn,
                                  ConstantInt::get(Int32Ty, 0),
                                  "fp",
                                  EntryBB->getTerminator());
    new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());

    // Save the stack pointer.
    Idxs[1] = ConstantInt::get(Int32Ty, 2);
    Value *StackPtr =
      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
                                EntryBB->getTerminator());

    Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
    new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());

    // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
    Value *SetjmpArg =
      CastInst::Create(Instruction::BitCast, JBufPtr,
                       Type::getInt8PtrTy(F.getContext()), "",
                       EntryBB->getTerminator());
    Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
                                          "dispatch",
                                          EntryBB->getTerminator());
    // check the return value of the setjmp. non-zero goes to dispatcher.
    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
                                   ICmpInst::ICMP_EQ, DispatchVal, Zero,
                                   "notunwind");
    // Nuke the uncond branch.
    EntryBB->getTerminator()->eraseFromParent();

    // Put in a new condbranch in its place.
    BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);

    // Register the function context and make sure it's known to not throw
    CallInst *Register =
      CallInst::Create(RegisterFn, FunctionContext, "",
                       ContBlock->getTerminator());
    Register->setDoesNotThrow();

    // At this point, we are all set up, update the invoke instructions
    // to mark their call_site values, and fill in the dispatch switch
    // accordingly.
    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
      markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);

    // Mark call instructions that aren't nounwind as no-action
    // (call_site == -1). Skip the entry block, as prior to then, no function
    // context has been created for this function and any unexpected exceptions
    // thrown will go directly to the caller's context, which is what we want
    // anyway, so no need to do anything here.
    for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
      for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          // Ignore calls to the EH builtins (eh.selector, eh.exception)
          Constant *Callee = CI->getCalledFunction();
          if (Callee != SelectorFn && Callee != ExceptionFn
              && !CI->doesNotThrow())
            insertCallSiteStore(CI, -1, CallSite);
        }
    }

    // Replace all unwinds with a branch to the unwind handler.
    // ??? Should this ever happen with sjlj exceptions?
    for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
      BranchInst::Create(UnwindBlock, Unwinds[i]);
      Unwinds[i]->eraseFromParent();
    }

    // Following any allocas not in the entry block, update the saved SP
    // in the jmpbuf to the new value.
    for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
      Instruction *AI = JmpbufUpdatePoints[i];
      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
      StackAddr->insertAfter(AI);
      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
      StoreStackAddr->insertAfter(StackAddr);
    }

    // Finally, for any returns from this function, if this function contains an
    // invoke, add a call to unregister the function context.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i)
      CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
  }

  return true;
}
void
UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
                      const SmallVectorImpl<SlotIndex> &Kills,
                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
                      MachineRegisterInfo &MRI, LiveIntervals &LIS) {
  if (Kills.empty())
    return;
  // Don't track copies from physregs, there are too many uses.
  if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
    return;

  // Collect all the (vreg, valno) pairs that are copies of LI.
  SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
  for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) {
    MachineInstr *MI = MO.getParent();
    // Copies of the full value.
    if (MO.getSubReg() || !MI->isCopy())
      continue;
    unsigned DstReg = MI->getOperand(0).getReg();

    // Don't follow copies to physregs. These are usually setting up call
    // arguments, and the argument registers are always call clobbered. We are
    // better off in the source register which could be a callee-saved register,
    // or it could be spilled.
    if (!TargetRegisterInfo::isVirtualRegister(DstReg))
      continue;

    // Is LocNo extended to reach this copy? If not, another def may be blocking
    // it, or we are looking at a wrong value of LI.
    SlotIndex Idx = LIS.getInstructionIndex(MI);
    LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
    if (!I.valid() || I.value() != LocNo)
      continue;

    if (!LIS.hasInterval(DstReg))
      continue;
    LiveInterval *DstLI = &LIS.getInterval(DstReg);
    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
    assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
    CopyValues.push_back(std::make_pair(DstLI, DstVNI));
  }

  if (CopyValues.empty())
    return;

  DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');

  // Try to add defs of the copied values for each kill point.
  for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
    SlotIndex Idx = Kills[i];
    for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
      LiveInterval *DstLI = CopyValues[j].first;
      const VNInfo *DstVNI = CopyValues[j].second;
      if (DstLI->getVNInfoAt(Idx) != DstVNI)
        continue;
      // Check that there isn't already a def at Idx
      LocMap::iterator I = locInts.find(Idx);
      if (I.valid() && I.start() <= Idx)
        continue;
      DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
                   << DstVNI->id << " in " << *DstLI << '\n');
      MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
      assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
      unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
      I.insert(Idx, Idx.getNextSlot(), LocNo);
      NewDefs.push_back(std::make_pair(Idx, LocNo));
      break;
    }
  }
}
Exemple #23
0
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  TII = ST.getInstrInfo();
  TRI = &TII->getRegisterInfo();
  SkipThreshold = SkipThresholdFlag;

  bool HaveKill = false;
  bool MadeChange = false;

  // Track depth of exec mask, divergent branches.
  SmallVector<MachineBasicBlock *, 16> ExecBranchStack;

  MachineFunction::iterator NextBB;

  MachineBasicBlock *EmptyMBBAtEnd = nullptr;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; BI = NextBB) {
    NextBB = std::next(BI);
    MachineBasicBlock &MBB = *BI;
    bool HaveSkipBlock = false;

    if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
      // Reached convergence point for last divergent branch.
      ExecBranchStack.pop_back();
    }

    if (HaveKill && ExecBranchStack.empty()) {
      HaveKill = false;

      // TODO: Insert skip if exec is 0?
    }

    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);

      MachineInstr &MI = *I;

      switch (MI.getOpcode()) {
      case AMDGPU::SI_MASK_BRANCH:
        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
        MadeChange |= skipMaskBranch(MI, MBB);
        break;

      case AMDGPU::S_BRANCH:
        // Optimize out branches to the next block.
        // FIXME: Shouldn't this be handled by BranchFolding?
        if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
          MI.eraseFromParent();
        } else if (HaveSkipBlock) {
          // Remove the given unconditional branch when a skip block has been
          // inserted after the current one and let skip the two instructions
          // performing the kill if the exec mask is non-zero.
          MI.eraseFromParent();
        }
        break;

      case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
      case AMDGPU::SI_KILL_I1_TERMINATOR:
        MadeChange = true;
        kill(MI);

        if (ExecBranchStack.empty()) {
          if (skipIfDead(MI, *NextBB)) {
            HaveSkipBlock = true;
            NextBB = std::next(BI);
            BE = MF.end();
          }
        } else {
          HaveKill = true;
        }

        MI.eraseFromParent();
        break;

      case AMDGPU::SI_RETURN_TO_EPILOG:
        // FIXME: Should move somewhere else
        assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());

        // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
        // because external bytecode will be appended at the end.
        if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
          // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
          // the end and jump there.
          if (!EmptyMBBAtEnd) {
            EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
            MF.insert(MF.end(), EmptyMBBAtEnd);
          }

          MBB.addSuccessor(EmptyMBBAtEnd);
          BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
            .addMBB(EmptyMBBAtEnd);
          I->eraseFromParent();
        }
        break;

      default:
        break;
      }
    }
  }

  return MadeChange;
}
static unsigned scanFromBlock(const CFGBlock *Start,
                              llvm::BitVector &Reachable,
                              Preprocessor *PP,
                              bool IncludeSometimesUnreachableEdges) {
  unsigned count = 0;

  // Prep work queue
  SmallVector<const CFGBlock*, 32> WL;

  // The entry block may have already been marked reachable
  // by the caller.
  if (!Reachable[Start->getBlockID()]) {
    ++count;
    Reachable[Start->getBlockID()] = true;
  }

  WL.push_back(Start);

  // Find the reachable blocks from 'Start'.
  while (!WL.empty()) {
    const CFGBlock *item = WL.pop_back_val();

    // There are cases where we want to treat all successors as reachable.
    // The idea is that some "sometimes unreachable" code is not interesting,
    // and that we should forge ahead and explore those branches anyway.
    // This allows us to potentially uncover some "always unreachable" code
    // within the "sometimes unreachable" code.
    // Look at the successors and mark then reachable.
    Optional<bool> TreatAllSuccessorsAsReachable;
    if (!IncludeSometimesUnreachableEdges)
      TreatAllSuccessorsAsReachable = false;

    for (CFGBlock::const_succ_iterator I = item->succ_begin(),
         E = item->succ_end(); I != E; ++I) {
      const CFGBlock *B = *I;
      if (!B) do {
        const CFGBlock *UB = I->getPossiblyUnreachableBlock();
        if (!UB)
          break;

        if (!TreatAllSuccessorsAsReachable.hasValue()) {
          assert(PP);
          TreatAllSuccessorsAsReachable =
            shouldTreatSuccessorsAsReachable(item, *PP);
        }

        if (TreatAllSuccessorsAsReachable.getValue()) {
          B = UB;
          break;
        }
      }
      while (false);

      if (B) {
        unsigned blockID = B->getBlockID();
        if (!Reachable[blockID]) {
          Reachable.set(blockID);
          WL.push_back(B);
          ++count;
        }
      }
    }
  }
  return count;
}
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
                                 bool IsSimple,
                                 MachineFunction &MF,
                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                 SmallVector<MachineInstr*, 16> &Copies) {
    DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');

    DenseSet<unsigned> UsedByPhi;
    getRegsUsedByPHIs(*TailBB, &UsedByPhi);

    if (IsSimple)
        return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);

    // Iterate through all the unique predecessors and tail-duplicate this
    // block into them, if possible. Copying the list ahead of time also
    // avoids trouble with the predecessor list reallocating.
    bool Changed = false;
    SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
            TailBB->pred_end());
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;

        assert(TailBB != PredBB &&
               "Single-block loop should have been rejected earlier!");
        // EH edges are ignored by AnalyzeBranch.
        if (PredBB->succ_size() > 1)
            continue;

        MachineBasicBlock *PredTBB, *PredFBB;
        SmallVector<MachineOperand, 4> PredCond;
        if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
            continue;
        if (!PredCond.empty())
            continue;
        // Don't duplicate into a fall-through predecessor (at least for now).
        if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
            continue;

        DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
              << "From Succ: " << *TailBB);

        TDBBs.push_back(PredBB);

        // Remove PredBB's unconditional branch.
        TII->RemoveBranch(*PredBB);

        if (RS && !TailBB->livein_empty()) {
            // Update PredBB livein.
            RS->enterBasicBlock(PredBB);
            if (!PredBB->empty())
                RS->forward(prior(PredBB->end()));
            BitVector RegsLiveAtExit(TRI->getNumRegs());
            RS->getRegsUsed(RegsLiveAtExit, false);
            for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
                    E = TailBB->livein_end(); I != E; ++I) {
                if (!RegsLiveAtExit[*I])
                    // If a register is previously livein to the tail but it's not live
                    // at the end of predecessor BB, then it should be added to its
                    // livein list.
                    PredBB->addLiveIn(*I);
            }
        }

        // Clone the contents of TailBB into PredBB.
        DenseMap<unsigned, unsigned> LocalVRMap;
        SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
        // Use instr_iterator here to properly handle bundles, e.g.
        // ARM Thumb2 IT block.
        MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
        while (I != TailBB->instr_end()) {
            MachineInstr *MI = &*I;
            ++I;
            if (MI->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
            } else {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
            }
        }
        MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
        for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
            Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                                     TII->get(TargetOpcode::COPY),
                                     CopyInfos[i].first).addReg(CopyInfos[i].second));
        }

        // Simplify
        TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);

        NumInstrDups += TailBB->size() - 1; // subtract one for removed branch

        // Update the CFG.
        PredBB->removeSuccessor(PredBB->succ_begin());
        assert(PredBB->succ_empty() &&
               "TailDuplicate called on block with multiple successors!");
        for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
                E = TailBB->succ_end(); I != E; ++I)
            PredBB->addSuccessor(*I);

        Changed = true;
        ++NumTailDups;
    }

    // If TailBB was duplicated into all its predecessors except for the prior
    // block, which falls through unconditionally, move the contents of this
    // block into the prior block.
    MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
    MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
    SmallVector<MachineOperand, 4> PriorCond;
    // This has to check PrevBB->succ_size() because EH edges are ignored by
    // AnalyzeBranch.
    if (PrevBB->succ_size() == 1 &&
            !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
            PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
            !TailBB->hasAddressTaken()) {
        DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
              << "From MBB: " << *TailBB);
        if (PreRegAlloc) {
            DenseMap<unsigned, unsigned> LocalVRMap;
            SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
            MachineBasicBlock::iterator I = TailBB->begin();
            // Process PHI instructions first.
            while (I != TailBB->end() && I->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                MachineInstr *MI = &*I++;
                ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
                if (MI->getParent())
                    MI->eraseFromParent();
            }

            // Now copy the non-PHI instructions.
            while (I != TailBB->end()) {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                MachineInstr *MI = &*I++;
                assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
                DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
                MI->eraseFromParent();
            }
            MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
            for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
                Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
                                         TII->get(TargetOpcode::COPY),
                                         CopyInfos[i].first)
                                 .addReg(CopyInfos[i].second));
            }
        } else {
            // No PHIs to worry about, just splice the instructions over.
            PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
        }
        PrevBB->removeSuccessor(PrevBB->succ_begin());
        assert(PrevBB->succ_empty());
        PrevBB->transferSuccessors(TailBB);
        TDBBs.push_back(PrevBB);
        Changed = true;
    }

    // If this is after register allocation, there are no phis to fix.
    if (!PreRegAlloc)
        return Changed;

    // If we made no changes so far, we are safe.
    if (!Changed)
        return Changed;


    // Handle the nasty case in that we duplicated a block that is part of a loop
    // into some but not all of its predecessors. For example:
    //    1 -> 2 <-> 3                 |
    //          \                      |
    //           \---> rest            |
    // if we duplicate 2 into 1 but not into 3, we end up with
    // 12 -> 3 <-> 2 -> rest           |
    //   \             /               |
    //    \----->-----/                |
    // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
    // with a phi in 3 (which now dominates 2).
    // What we do here is introduce a copy in 3 of the register defined by the
    // phi, just like when we are duplicating 2 into 3, but we don't copy any
    // real instructions or remove the 3 -> 2 edge from the phi in 2.
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;
        if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
            continue;

        // EH edges
        if (PredBB->succ_size() != 1)
            continue;

        DenseMap<unsigned, unsigned> LocalVRMap;
        SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
        MachineBasicBlock::iterator I = TailBB->begin();
        // Process PHI instructions first.
        while (I != TailBB->end() && I->isPHI()) {
            // Replace the uses of the def of the PHI with the register coming
            // from PredBB.
            MachineInstr *MI = &*I++;
            ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
        }
        MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
        for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
            Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                                     TII->get(TargetOpcode::COPY),
                                     CopyInfos[i].first).addReg(CopyInfos[i].second));
        }
    }

    return Changed;
}
static bool markAliveBlocks(BasicBlock *BB,
                            SmallPtrSet<BasicBlock*, 128> &Reachable) {

  SmallVector<BasicBlock*, 128> Worklist;
  Worklist.push_back(BB);
  bool Changed = false;
  do {
    BB = Worklist.pop_back_val();

    if (!Reachable.insert(BB))
      continue;

    // Do a quick scan of the basic block, turning any obviously unreachable
    // instructions into LLVM unreachable insts.  The instruction combining pass
    // canonicalizes unreachable insts into stores to null or undef.
    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
        if (CI->doesNotReturn()) {
          // If we found a call to a no-return function, insert an unreachable
          // instruction after it.  Make sure there isn't *already* one there
          // though.
          ++BBI;
          if (!isa<UnreachableInst>(BBI)) {
            // Don't insert a call to llvm.trap right before the unreachable.
            changeToUnreachable(BBI, false);
            Changed = true;
          }
          break;
        }
      }

      // Store to undef and store to null are undefined and used to signal that
      // they should be changed to unreachable by passes that can't modify the
      // CFG.
      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
        // Don't touch volatile stores.
        if (SI->isVolatile()) continue;

        Value *Ptr = SI->getOperand(1);

        if (isa<UndefValue>(Ptr) ||
            (isa<ConstantPointerNull>(Ptr) &&
             SI->getPointerAddressSpace() == 0)) {
          changeToUnreachable(SI, true);
          Changed = true;
          break;
        }
      }
    }

    // Turn invokes that call 'nounwind' functions into ordinary calls.
    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      Value *Callee = II->getCalledValue();
      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
        changeToUnreachable(II, true);
        Changed = true;
      } else if (II->doesNotThrow()) {
        if (II->use_empty() && II->onlyReadsMemory()) {
          // jump to the normal destination branch.
          BranchInst::Create(II->getNormalDest(), II);
          II->getUnwindDest()->removePredecessor(II->getParent());
          II->eraseFromParent();
        } else
          changeToCall(II);
        Changed = true;
      }
    }

    Changed |= ConstantFoldTerminator(BB, true);
    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
      Worklist.push_back(*SI);
  } while (!Worklist.empty());
  return Changed;
}
Exemple #27
0
void MachineBasicBlock::updateTerminator() {
  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
  // A block with no successors has no concerns with fall-through edges.
  if (this->succ_empty()) return;

  MachineBasicBlock *TBB = 0, *FBB = 0;
  SmallVector<MachineOperand, 4> Cond;
  DebugLoc dl;  // FIXME: this is nowhere
  bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
  (void) B;
  assert(!B && "UpdateTerminators requires analyzable predecessors!");
  if (Cond.empty()) {
    if (TBB) {
      // The block has an unconditional branch. If its successor is now
      // its layout successor, delete the branch.
      if (isLayoutSuccessor(TBB))
        TII->RemoveBranch(*this);
    } else {
      // The block has an unconditional fallthrough. If its successor is not
      // its layout successor, insert a branch. First we have to locate the
      // only non-landing-pad successor, as that is the fallthrough block.
      for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
        if ((*SI)->isLandingPad())
          continue;
        assert(!TBB && "Found more than one non-landing-pad successor!");
        TBB = *SI;
      }

      // If there is no non-landing-pad successor, the block has no
      // fall-through edges to be concerned with.
      if (!TBB)
        return;

      // Finally update the unconditional successor to be reached via a branch
      // if it would not be reached by fallthrough.
      if (!isLayoutSuccessor(TBB))
        TII->InsertBranch(*this, TBB, 0, Cond, dl);
    }
  } else {
    if (FBB) {
      // The block has a non-fallthrough conditional branch. If one of its
      // successors is its layout successor, rewrite it to a fallthrough
      // conditional branch.
      if (isLayoutSuccessor(TBB)) {
        if (TII->ReverseBranchCondition(Cond))
          return;
        TII->RemoveBranch(*this);
        TII->InsertBranch(*this, FBB, 0, Cond, dl);
      } else if (isLayoutSuccessor(FBB)) {
        TII->RemoveBranch(*this);
        TII->InsertBranch(*this, TBB, 0, Cond, dl);
      }
    } else {
      // Walk through the successors and find the successor which is not
      // a landing pad and is not the conditional branch destination (in TBB)
      // as the fallthrough successor.
      MachineBasicBlock *FallthroughBB = 0;
      for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
        if ((*SI)->isLandingPad() || *SI == TBB)
          continue;
        assert(!FallthroughBB && "Found more than one fallthrough successor.");
        FallthroughBB = *SI;
      }
      if (!FallthroughBB && canFallThrough()) {
        // We fallthrough to the same basic block as the conditional jump
        // targets. Remove the conditional jump, leaving unconditional
        // fallthrough.
        // FIXME: This does not seem like a reasonable pattern to support, but it
        // has been seen in the wild coming out of degenerate ARM test cases.
        TII->RemoveBranch(*this);

        // Finally update the unconditional successor to be reached via a branch
        // if it would not be reached by fallthrough.
        if (!isLayoutSuccessor(TBB))
          TII->InsertBranch(*this, TBB, 0, Cond, dl);
        return;
      }

      // The block has a fallthrough conditional branch.
      if (isLayoutSuccessor(TBB)) {
        if (TII->ReverseBranchCondition(Cond)) {
          // We can't reverse the condition, add an unconditional branch.
          Cond.clear();
          TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
          return;
        }
        TII->RemoveBranch(*this);
        TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
      } else if (!isLayoutSuccessor(FallthroughBB)) {
        TII->RemoveBranch(*this);
        TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
      }
    }
  }
}
RequirementCheckResult TypeChecker::checkGenericArguments(
    DeclContext *dc, SourceLoc loc, SourceLoc noteLoc, Type owner,
    GenericSignature *genericSig, TypeSubstitutionFn substitutions,
    LookupConformanceFn conformances,
    UnsatisfiedDependency *unsatisfiedDependency,
    ConformanceCheckOptions conformanceOptions,
    GenericRequirementsCheckListener *listener,
    SubstOptions options) {
  bool valid = true;

  struct RequirementSet {
    ArrayRef<Requirement> Requirements;
    SmallVector<ParentConditionalConformance, 4> Parents;
  };

  SmallVector<RequirementSet, 8> pendingReqs;
  pendingReqs.push_back({genericSig->getRequirements(), {}});

  while (!pendingReqs.empty()) {
    auto current = pendingReqs.pop_back_val();

    for (const auto &rawReq : current.Requirements) {
      auto req = rawReq;
      if (current.Parents.empty()) {
        auto substed = rawReq.subst(substitutions, conformances, options);
        if (!substed) {
          // Another requirement will fail later; just continue.
          valid = false;
          continue;
        }

        req = *substed;
      }

      auto kind = req.getKind();
      Type rawFirstType = rawReq.getFirstType();
      Type firstType = req.getFirstType();
      Type rawSecondType, secondType;
      if (kind != RequirementKind::Layout) {
        rawSecondType = rawReq.getSecondType();
        secondType = req.getSecondType();
      }

      bool requirementFailure = false;
      if (listener && !listener->shouldCheck(kind, firstType, secondType))
        continue;

      Diag<Type, Type, Type> diagnostic;
      Diag<Type, Type, StringRef> diagnosticNote;

      switch (kind) {
      case RequirementKind::Conformance: {
        // Protocol conformance requirements.
        auto proto = secondType->castTo<ProtocolType>();
        // FIXME: This should track whether this should result in a private
        // or non-private dependency.
        // FIXME: Do we really need "used" at this point?
        // FIXME: Poor location information. How much better can we do here?
        // FIXME: This call should support listener to be able to properly
        //        diagnose problems with conformances.
        auto result =
            conformsToProtocol(firstType, proto->getDecl(), dc,
                               conformanceOptions, loc, unsatisfiedDependency);

        // Unsatisfied dependency case.
        auto status = result.getStatus();
        switch (status) {
        case RequirementCheckResult::Failure:
          // A failure at the top level is diagnosed elsewhere.
          if (current.Parents.empty())
            return status;

          diagnostic = diag::type_does_not_conform_owner;
          diagnosticNote = diag::type_does_not_inherit_or_conform_requirement;
          requirementFailure = true;
          break;
        case RequirementCheckResult::UnsatisfiedDependency:
        case RequirementCheckResult::SubstitutionFailure:
          // pass it on up.
          return status;
        case RequirementCheckResult::Success: {
          auto conformance = result.getConformance();
          // Report the conformance.
          if (listener && valid && current.Parents.empty()) {
            listener->satisfiedConformance(rawFirstType, firstType,
                                           conformance);
          }

          auto conditionalReqs = conformance.getConditionalRequirements();
          if (!conditionalReqs.empty()) {
            auto history = current.Parents;
            history.push_back({firstType, proto});
            pendingReqs.push_back({conditionalReqs, std::move(history)});
          }
          continue;
        }
        }

        // Failure needs to emit a diagnostic.
        break;
      }

      case RequirementKind::Layout: {
        // TODO: Statically check if a the first type
        // conforms to the layout constraint, once we
        // support such static checks.
        continue;
      }

      case RequirementKind::Superclass:
        // Superclass requirements.
        if (!isSubclassOf(firstType, secondType, dc)) {
          diagnostic = diag::type_does_not_inherit;
          diagnosticNote = diag::type_does_not_inherit_or_conform_requirement;
          requirementFailure = true;
        }
        break;

      case RequirementKind::SameType:
        if (!firstType->isEqual(secondType)) {
          diagnostic = diag::types_not_equal;
          diagnosticNote = diag::types_not_equal_requirement;
          requirementFailure = true;
        }
        break;
      }

      if (!requirementFailure)
        continue;

      if (listener &&
          listener->diagnoseUnsatisfiedRequirement(rawReq, firstType,
                                                   secondType, current.Parents))
        return RequirementCheckResult::Failure;

      if (loc.isValid()) {
        // FIXME: Poor source-location information.
        diagnose(loc, diagnostic, owner, firstType, secondType);
        diagnose(noteLoc, diagnosticNote, rawFirstType, rawSecondType,
                 genericSig->gatherGenericParamBindingsText(
                     {rawFirstType, rawSecondType}, substitutions));

        ParentConditionalConformance::diagnoseConformanceStack(Diags, noteLoc,
                                                               current.Parents);
      }

      return RequirementCheckResult::Failure;
    }
  }

  if (valid)
    return RequirementCheckResult::Success;
  return RequirementCheckResult::SubstitutionFailure;
}
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
///
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  // If IFI has any state in it, zap it before we fill it in.
  IFI.reset();
  
  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;

  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get the personality function from the callee if it contains a landing pad.
  Value *CalleePersonality = 0;
  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
       I != E; ++I)
    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
      const BasicBlock *BB = II->getUnwindDest();
      const LandingPadInst *LP = BB->getLandingPadInst();
      CalleePersonality = LP->getPersonalityFn();
      break;
    }

  // Find the personality function used by the landing pads of the caller. If it
  // exists, then check to see that it matches the personality function used in
  // the callee.
  if (CalleePersonality) {
    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
         I != E; ++I)
      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
        const BasicBlock *BB = II->getUnwindDest();
        const LandingPadInst *LP = BB->getLandingPadInst();

        // If the personality functions match, then we can perform the
        // inlining. Otherwise, we can't inline.
        // TODO: This isn't 100% true. Some personality functions are proper
        //       supersets of others and can be used in place of the other.
        if (LP->getPersonalityFn() != CalleePersonality)
          return false;

        break;
      }
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  SmallVector<ReturnInst*, 8> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy VMap after cloning.
    ValueToValueMapTy VMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CS.isByValArgument(ArgNo)) {
        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                        CalledFunc->getParamAlignment(ArgNo+1));
 
        // Calls that we inline may use the new alloca, so we need to clear
        // their 'tail' flags if HandleByValArgument introduced a new alloca and
        // the callee has calls.
        MustClearTailCallFlags |= ActualArg != *AI;
      }

      VMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                              /*ModuleLevelChanges=*/false, Returns, ".i",
                              &InlinedFunctionInfo, IFI.TD, TheCall);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (IFI.CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

    // Update inlined instructions' line number information.
    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
         E = FirstNewBlock->end(); I != E; ) {
      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
      if (AI == 0) continue;
      
      // If the alloca is now dead, remove it.  This often occurs due to code
      // specialization.
      if (AI->use_empty()) {
        AI->eraseFromParent();
        continue;
      }

      if (!isa<Constant>(AI->getArraySize()))
        continue;
      
      // Keep track of the static allocas that we inline into the caller.
      IFI.StaticAllocas.push_back(AI);
      
      // Scan for the block of allocas that we can move over, and move them
      // all at once.
      while (isa<AllocaInst>(I) &&
             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
        ++I;
      }

      // Transfer all of the allocas over in a block.  Using splice means
      // that the instructions aren't removed from the symbol table, then
      // reinserted.
      Caller->getEntryBlock().getInstList().splice(InsertPoint,
                                                   FirstNewBlock->getInstList(),
                                                   AI, I);
    }
  }

  // Leave lifetime markers for the static alloca's, scoping them to the
  // function we just inlined.
  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    IRBuilder<> builder(FirstNewBlock->begin());
    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
      AllocaInst *AI = IFI.StaticAllocas[ai];

      // If the alloca is already scoped to something smaller than the whole
      // function then there's no need to add redundant, less accurate markers.
      if (hasLifetimeMarkers(AI))
        continue;

      builder.CreateLifetimeStart(AI);
      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
        IRBuilder<> builder(Returns[ri]);
        builder.CreateLifetimeEnd(AI);
      }
    }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
      .CreateCall(StackSave, "savedstack");

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
      BranchInst::Create(II->getNormalDest(), TheCall);

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      if (TheCall == R->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  Type *RTy = CalledFunc->getReturnType();

  PHINode *PHI = 0;
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }


    // Add a branch to the merge points and remove return instructions.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst::Create(AfterCallBB, RI);
      RI->eraseFromParent();
    }
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty()) {
      if (TheCall == Returns[0]->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    }

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  // If we inserted a phi node, check to see if it has a single value (e.g. all
  // the entries are the same or undef).  If so, remove the PHI so it doesn't
  // block other optimizations.
  if (PHI) {
    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
      PHI->replaceAllUsesWith(V);
      PHI->eraseFromParent();
    }
  }

  return true;
}
Exemple #30
0
/// Iteratively perform simplification on a worklist of users
/// of the specified induction variable. Each successive simplification may push
/// more users which may themselves be candidates for simplification.
///
/// This algorithm does not require IVUsers analysis. Instead, it simplifies
/// instructions in-place during analysis. Rather than rewriting induction
/// variables bottom-up from their users, it transforms a chain of IVUsers
/// top-down, updating the IR only when it encounters a clear optimization
/// opportunity.
///
/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
///
void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
  if (!SE->isSCEVable(CurrIV->getType()))
    return;

  // Instructions processed by SimplifyIndvar for CurrIV.
  SmallPtrSet<Instruction*,16> Simplified;

  // Use-def pairs if IV users waiting to be processed for CurrIV.
  SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;

  // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
  // called multiple times for the same LoopPhi. This is the proper thing to
  // do for loop header phis that use each other.
  pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);

  while (!SimpleIVUsers.empty()) {
    std::pair<Instruction*, Instruction*> UseOper =
      SimpleIVUsers.pop_back_val();
    Instruction *UseInst = UseOper.first;

    // Bypass back edges to avoid extra work.
    if (UseInst == CurrIV) continue;

    // Try to replace UseInst with a loop invariant before any other
    // simplifications.
    if (replaceIVUserWithLoopInvariant(UseInst))
      continue;

    Instruction *IVOperand = UseOper.second;
    for (unsigned N = 0; IVOperand; ++N) {
      assert(N <= Simplified.size() && "runaway iteration");

      Value *NewOper = foldIVUser(UseOper.first, IVOperand);
      if (!NewOper)
        break; // done folding
      IVOperand = dyn_cast<Instruction>(NewOper);
    }
    if (!IVOperand)
      continue;

    if (eliminateIVUser(UseOper.first, IVOperand)) {
      pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
      continue;
    }

    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
      if ((isa<OverflowingBinaryOperator>(BO) &&
           strengthenOverflowingOperation(BO, IVOperand)) ||
          (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
        // re-queue uses of the now modified binary operator and fall
        // through to the checks that remain.
        pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
      }
    }

    CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
    if (V && Cast) {
      V->visitCast(Cast);
      continue;
    }
    if (isSimpleIVUser(UseOper.first, L, SE)) {
      pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers);
    }
  }
}