コード例 #1
0
ファイル: LoopUnroll.cpp プロジェクト: Blei/llvm-dcpu16
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, unsigned TripMultiple,
                      LoopInfo *LI, LPPassManager *LPM) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  // Don't enter the unroll code if there is nothing to do. This way we don't
  // need to support "partial unrolling by 1".
  if (TripCount == 0 && Count < 2)
    return false;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
    DT->runOnFunction(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, LPM, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  return true;
}
コード例 #2
0
/// \brief Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
                                             llvm::Loop *L, BranchInst *Term) {

  // Only search through PHI nodes which are inside the loop.  If we try this
  // with PHI nodes that are outside of the loop, we end up inserting new PHI
  // nodes outside of the loop which depend on values defined inside the loop.
  // This will break the module with
  // 'Instruction does not dominate all users!' errors.
  PHINode *Phi = nullptr;
  if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {

    BasicBlock *Parent = Phi->getParent();
    PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
    Value *Ret = NewPhi;

    // Handle all non-constant incoming values first
    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
      Value *Incoming = Phi->getIncomingValue(i);
      BasicBlock *From = Phi->getIncomingBlock(i);
      if (isa<ConstantInt>(Incoming)) {
        NewPhi->addIncoming(Broken, From);
        continue;
      }

      Phi->setIncomingValue(i, BoolFalse);
      Value *PhiArg = handleLoopCondition(Incoming, Broken, L, Term);
      NewPhi->addIncoming(PhiArg, From);
    }

    BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();

    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {

      Value *Incoming = Phi->getIncomingValue(i);
      if (Incoming != BoolTrue)
        continue;

      BasicBlock *From = Phi->getIncomingBlock(i);
      if (From == IDom) {
        // We're in the following situation:
        //   IDom/From
        //      |   \
        //      |   If-block
        //      |   /
        //     Parent
        // where we want to break out of the loop if the If-block is not taken.
        // Due to the depth-first traversal, there should be an end.cf
        // intrinsic in Parent, and we insert an else.break before it.
        //
        // Note that the end.cf need not be the first non-phi instruction
        // of parent, particularly when we're dealing with a multi-level
        // break, but it should occur within a group of intrinsic calls
        // at the beginning of the block.
        CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
        while (OldEnd && OldEnd->getCalledFunction() != EndCf)
          OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode());
        if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
          Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
          Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
          continue;
        }
      }
      TerminatorInst *Insert = From->getTerminator();
      Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
      NewPhi->setIncomingValue(i, PhiArg);
    }
    eraseIfUnused(Phi);
    return Ret;

  } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
    BasicBlock *Parent = Inst->getParent();
    Instruction *Insert;
    if (L->contains(Inst)) {
      Insert = Parent->getTerminator();
    } else {
      Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
    }
    Value *Args[] = { Cond, Broken };
    return CallInst::Create(IfBreak, Args, "", Insert);

  // Insert IfBreak before TERM for constant COND.
  } else if (isa<ConstantInt>(Cond)) {
    Value *Args[] = { Cond, Broken };
    return CallInst::Create(IfBreak, Args, "", Term);

  } else {
    llvm_unreachable("Unhandled loop condition!");
  }
  return nullptr;
}
コード例 #3
0
ファイル: SjLjEHPrepare.cpp プロジェクト: CTSRD-CHERI/llvm
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
  SmallVector<ReturnInst *, 16> Returns;
  SmallVector<InvokeInst *, 16> Invokes;
  SmallSetVector<LandingPadInst *, 16> LPads;

  // Look through the terminators of the basic blocks to find invokes.
  for (BasicBlock &BB : F)
    if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
      if (Function *Callee = II->getCalledFunction())
        if (Callee->getIntrinsicID() == Intrinsic::donothing) {
          // Remove the NOP invoke.
          BranchInst::Create(II->getNormalDest(), II);
          II->eraseFromParent();
          continue;
        }

      Invokes.push_back(II);
      LPads.insert(II->getUnwindDest()->getLandingPadInst());
    } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
      Returns.push_back(RI);
    }

  if (Invokes.empty())
    return false;

  NumInvokes += Invokes.size();

  lowerIncomingArguments(F);
  lowerAcrossUnwindEdges(F, Invokes);

  Value *FuncCtx =
      setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
  BasicBlock *EntryBB = &F.front();
  IRBuilder<> Builder(EntryBB->getTerminator());

  // Get a reference to the jump buffer.
  Value *JBufPtr =
      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");

  // Save the frame pointer.
  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
                                               "jbuf_fp_gep");

  Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
  Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);

  // Save the stack pointer.
  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
                                               "jbuf_sp_gep");

  Val = Builder.CreateCall(StackAddrFn, {}, "sp");
  Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);

  // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
  Builder.CreateCall(BuiltinSetupDispatchFn, {});

  // Store a pointer to the function context so that the back-end will know
  // where to look for it.
  Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
  Builder.CreateCall(FuncCtxFn, FuncCtxArg);

  // At this point, we are all set up, update the invoke instructions to mark
  // their call_site values.
  for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
    insertCallSiteStore(Invokes[I], I + 1);

    ConstantInt *CallSiteNum =
        ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);

    // Record the call site value for the back end so it stays associated with
    // the invoke.
    CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
  }

  // Mark call instructions that aren't nounwind as no-action (call_site ==
  // -1). Skip the entry block, as prior to then, no function context has been
  // created for this function and any unexpected exceptions thrown will go
  // directly to the caller's context, which is what we want anyway, so no need
  // to do anything here.
  for (BasicBlock &BB : F) {
    if (&BB == &F.front())
      continue;
    for (Instruction &I : BB)
      if (I.mayThrow())
        insertCallSiteStore(&I, -1);
  }

  // Register the function context and make sure it's known to not throw
  CallInst *Register =
      CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
  Register->setDoesNotThrow();

  // Following any allocas not in the entry block, update the saved SP in the
  // jmpbuf to the new value.
  for (BasicBlock &BB : F) {
    if (&BB == &F.front())
      continue;
    for (Instruction &I : BB) {
      if (auto *CI = dyn_cast<CallInst>(&I)) {
        if (CI->getCalledFunction() != StackRestoreFn)
          continue;
      } else if (!isa<AllocaInst>(&I)) {
        continue;
      }
      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
      StackAddr->insertAfter(&I);
      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
      StoreStackAddr->insertAfter(StackAddr);
    }
  }

  // Finally, for any returns from this function, if this function contains an
  // invoke, add a call to unregister the function context.
  for (ReturnInst *Return : Returns)
    CallInst::Create(UnregisterFn, FuncCtx, "", Return);

  return true;
}
コード例 #4
0
ファイル: ARMIREmitter.cpp プロジェクト: SAB2012/fracture
// Note: branch conditions, by definition, only have a chain user.
// This is why it should not be saved in a map for recall.
Value* ARMIREmitter::visitBRCOND(const SDNode *N) {
  // Get the address
  const ConstantSDNode *DestNode = dyn_cast<ConstantSDNode>(N->getOperand(0));
  if (!DestNode) {
    printError("visitBRCOND: Not a constant integer for branch!");
    return NULL;
  }

  uint64_t DestInt = DestNode->getSExtValue();
  uint64_t PC = Dec->getDisassembler()->getDebugOffset(N->getDebugLoc());
  // Note: pipeline is 8 bytes
  uint64_t Tgt = PC + 8 + DestInt;

  Function *F = IRB->GetInsertBlock()->getParent();
  BasicBlock *CurBB = IRB->GetInsertBlock();

  BasicBlock *BBTgt = Dec->getOrCreateBasicBlock(Tgt, F);

  // Parse the branch condition code
  const ConstantSDNode *CCNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
  if (!CCNode) {
    printError("visitBRCOND: Condition code is not a constant integer!");
    return NULL;
  }
  ARMCC::CondCodes ARMcc = ARMCC::CondCodes(CCNode->getZExtValue());

  // Unconditional branch
  if (ARMcc == ARMCC::AL) {
    Instruction *Br = IRB->CreateBr(BBTgt);
    Br->setDebugLoc(N->getDebugLoc());
    return Br;
  }

  // If not a conditional branch, find the successor block and look at CC
  BasicBlock *NextBB = NULL;
  Function::iterator BI = F->begin(), BE= F->end();
  while (BI != BE && BI->getName() != CurBB->getName()) ++BI;
  ++BI;
  if (BI == BE) {               // NOTE: This should never happen...
    NextBB = Dec->getOrCreateBasicBlock("end", F);
  } else {
    NextBB = &(*BI);
  }


  SDNode *CPSR = N->getOperand(2)->getOperand(1).getNode();
  SDNode *CMPNode = NULL;
  for (SDNode::use_iterator I = CPSR->use_begin(), E = CPSR->use_end(); I != E;
       ++I) {
    if (I->getOpcode() == ISD::CopyToReg) {
      CMPNode = I->getOperand(2).getNode();
    }
  }

  if (CMPNode == NULL) {
    errs() << "ARMIREmitter ERROR: Could not find CMP SDNode for ARMBRCond!\n";
    return NULL;
  }

  Value *Cmp = NULL;
  Value *LHS = visit(CMPNode->getOperand(0).getNode());
  Value *RHS = visit(CMPNode->getOperand(1).getNode());
  // See ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC); in ARMISelLowering.cpp
  // TODO: Add support for conditions that handle floating point
  switch(ARMcc) {
    default:
      printError("Unknown condition code");
      return NULL;
    case ARMCC::EQ:
      Cmp = IRB->CreateICmpEQ(LHS, RHS);
      break;
    case ARMCC::NE:
      Cmp = IRB->CreateICmpNE(LHS, RHS);
      break;
    case ARMCC::HS:
      // HS - unsigned higher or same (or carry set)
      Cmp = IRB->CreateICmpUGE(LHS, RHS);
      break;
    case ARMCC::LO:
      // LO - unsigned lower (or carry clear)
      Cmp = IRB->CreateICmpULT(LHS, RHS);
      break;
    case ARMCC::MI:
      // MI - minus (negative)
      printError("Condition code MI is not handled at this time!");
      return NULL;
      // break;
    case ARMCC::PL:
      // PL - plus (positive or zero)
      printError("Condition code PL is not handled at this time!");
      return NULL;
      // break;
    case ARMCC::VS:
      // VS - V Set (signed overflow)
      printError("Condition code VS is not handled at this time!");
      return NULL;
      // break;
    case ARMCC::VC:
      // VC - V clear (no signed overflow)
      printError("Condition code VC is not handled at this time!");
      return NULL;
      // break;
    case ARMCC::HI:
      // HI - unsigned higher
      Cmp = IRB->CreateICmpUGT(LHS, RHS);
      break;
    case ARMCC::LS:
      // LS - unsigned lower or same
      Cmp = IRB->CreateICmpULE(LHS, RHS);
      break;
    case ARMCC::GE:
      // GE - signed greater or equal
      Cmp = IRB->CreateICmpSGE(LHS, RHS);
      break;
    case ARMCC::LT:
      // LT - signed less than
      Cmp = IRB->CreateICmpSLT(LHS, RHS);
      break;
    case ARMCC::GT:
      // GT - signed greater than
      Cmp = IRB->CreateICmpSGT(LHS, RHS);
      break;
    case ARMCC::LE:
      // LE - signed less than or equal
      Cmp = IRB->CreateICmpSLE(LHS, RHS);
      break;
  }
  (dyn_cast<Instruction>(Cmp))->setDebugLoc(N->getOperand(2)->getDebugLoc());

  // Conditional branch
  Instruction *Br = IRB->CreateCondBr(Cmp, BBTgt, NextBB);
  Br->setDebugLoc(N->getDebugLoc());
  return Br;
}
コード例 #5
0
/// NormalizeLandingPads - Normalize and discover landing pads, noting them
/// in the LandingPads set.  A landing pad is normal if the only CFG edges
/// that end at it are unwind edges from invoke instructions. If we inlined
/// through an invoke we could have a normal branch from the previous
/// unwind block through to the landing pad for the original invoke.
/// Abnormal landing pads are fixed up by redirecting all unwind edges to
/// a new basic block which falls through to the original.
bool DwarfEHPrepare::NormalizeLandingPads() {
  bool Changed = false;

  const MCAsmInfo *MAI = TM->getMCAsmInfo();
  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;

  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
    TerminatorInst *TI = I->getTerminator();
    if (!isa<InvokeInst>(TI))
      continue;
    BasicBlock *LPad = TI->getSuccessor(1);
    // Skip landing pads that have already been normalized.
    if (LandingPads.count(LPad))
      continue;

    // Check that only invoke unwind edges end at the landing pad.
    bool OnlyUnwoundTo = true;
    bool SwitchOK = usingSjLjEH;
    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
         PI != PE; ++PI) {
      TerminatorInst *PT = (*PI)->getTerminator();
      // The SjLj dispatch block uses a switch instruction. This is effectively
      // an unwind edge, so we can disregard it here. There will only ever
      // be one dispatch, however, so if there are multiple switches, one
      // of them truly is a normal edge, not an unwind edge.
      if (SwitchOK && isa<SwitchInst>(PT)) {
        SwitchOK = false;
        continue;
      }
      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
        OnlyUnwoundTo = false;
        break;
      }
    }

    if (OnlyUnwoundTo) {
      // Only unwind edges lead to the landing pad.  Remember the landing pad.
      LandingPads.insert(LPad);
      continue;
    }

    // At least one normal edge ends at the landing pad.  Redirect the unwind
    // edges to a new basic block which falls through into this one.

    // Create the new basic block.
    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
                                           LPad->getName() + "_unwind_edge");

    // Insert it into the function right before the original landing pad.
    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);

    // Redirect unwind edges from the original landing pad to NewBB.
    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
      TerminatorInst *PT = (*PI++)->getTerminator();
      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
        // Unwind to the new block.
        PT->setSuccessor(1, NewBB);
    }

    // If there are any PHI nodes in LPad, we need to update them so that they
    // merge incoming values from NewBB instead.
    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
      PHINode *PN = cast<PHINode>(II);
      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);

      // Check to see if all of the values coming in via unwind edges are the
      // same.  If so, we don't need to create a new PHI node.
      Value *InVal = PN->getIncomingValueForBlock(*PB);
      for (pred_iterator PI = PB; PI != PE; ++PI) {
        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
          InVal = 0;
          break;
        }
      }

      if (InVal == 0) {
        // Different unwind edges have different values.  Create a new PHI node
        // in NewBB.
        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
                                         NewBB);
        // Add an entry for each unwind edge, using the value from the old PHI.
        for (pred_iterator PI = PB; PI != PE; ++PI)
          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);

        // Now use this new PHI as the common incoming value for NewBB in PN.
        InVal = NewPN;
      }

      // Revector exactly one entry in the PHI node to come from NewBB
      // and delete all other entries that come from unwind edges.  If
      // there are both normal and unwind edges from the same predecessor,
      // this leaves an entry for the normal edge.
      for (pred_iterator PI = PB; PI != PE; ++PI)
        PN->removeIncomingValue(*PI);
      PN->addIncoming(InVal, NewBB);
    }

    // Add a fallthrough from NewBB to the original landing pad.
    BranchInst::Create(LPad, NewBB);

    // Now update DominatorTree and DominanceFrontier analysis information.
    if (DT)
      DT->splitBlock(NewBB);
    if (DF)
      DF->splitBlock(NewBB);

    // Remember the newly constructed landing pad.  The original landing pad
    // LPad is no longer a landing pad now that all unwind edges have been
    // revectored to NewBB.
    LandingPads.insert(NewBB);
    ++NumLandingPadsSplit;
    Changed = true;
  }

  return Changed;
}
コード例 #6
0
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
  LoopInfo *LI = &getAnalysis<LoopInfo>();
  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);
  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());

  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // The bit we are stealing from the pointer represents whether this basic
  // block is the header of a subloop, in which case we only process its phis.
  typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
  SmallVector<WorklistItem, 16> VisitStack;
  SmallPtrSet<BasicBlock*, 32> Visited;

  bool Changed = false;
  bool LocalChanged;
  do {
    LocalChanged = false;

    VisitStack.clear();
    Visited.clear();

    VisitStack.push_back(WorklistItem(L->getHeader(), false));

    while (!VisitStack.empty()) {
      WorklistItem Item = VisitStack.pop_back_val();
      BasicBlock *BB = Item.getPointer();
      bool IsSubloopHeader = Item.getInt();

      // Simplify instructions in the current basic block.
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
        Instruction *I = BI++;

        // The first time through the loop ToSimplify is empty and we try to
        // simplify all instructions. On later iterations ToSimplify is not
        // empty and we only bother simplifying instructions that are in it.
        if (!ToSimplify->empty() && !ToSimplify->count(I))
          continue;

        // Don't bother simplifying unused instructions.
        if (!I->use_empty()) {
          Value *V = SimplifyInstruction(I, TD, TLI, DT);
          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
            // Mark all uses for resimplification next time round the loop.
            for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
                 UI != UE; ++UI)
              Next->insert(cast<Instruction>(*UI));

            I->replaceAllUsesWith(V);
            LocalChanged = true;
            ++NumSimplified;
          }
        }
        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);

        if (IsSubloopHeader && !isa<PHINode>(I))
          break;
      }

      // Add all successors to the worklist, except for loop exit blocks and the
      // bodies of subloops. We visit the headers of loops so that we can process
      // their phis, but we contract the rest of the subloop body and only follow
      // edges leading back to the original loop.
      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
           ++SI) {
        BasicBlock *SuccBB = *SI;
        if (!Visited.insert(SuccBB))
          continue;

        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
        if (SuccLoop && SuccLoop->getHeader() == SuccBB
                     && L->contains(SuccLoop)) {
          VisitStack.push_back(WorklistItem(SuccBB, true));

          SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
          SuccLoop->getExitBlocks(SubLoopExitBlocks);

          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
            BasicBlock *ExitBB = SubLoopExitBlocks[i];
            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
              VisitStack.push_back(WorklistItem(ExitBB, false));
          }

          continue;
        }

        bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
                                              ExitBlocks.end(), SuccBB);
        if (IsExitBlock)
          continue;

        VisitStack.push_back(WorklistItem(SuccBB, false));
      }
    }

    // Place the list of instructions to simplify on the next loop iteration
    // into ToSimplify.
    std::swap(ToSimplify, Next);
    Next->clear();

    Changed |= LocalChanged;
  } while (LocalChanged);

  return Changed;
}
コード例 #7
0
/// Create a clone of the blocks in a loop and connect them together.
/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
/// loop will be created including all cloned blocks, and the iterator of it
/// switches to count NewIter down to 0.
///
static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
                            BasicBlock *InsertTop, BasicBlock *InsertBot,
                            std::vector<BasicBlock *> &NewBlocks,
                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
                            LoopInfo *LI) {
  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  Function *F = Header->getParent();
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  Loop *NewLoop = nullptr;
  Loop *ParentLoop = L->getParentLoop();
  if (!UnrollProlog) {
    NewLoop = new Loop();
    if (ParentLoop)
      ParentLoop->addChildLoop(NewLoop);
    else
      LI->addTopLevelLoop(NewLoop);
  }

  // For each block in the original loop, create a new copy,
  // and update the value map with the newly created values.
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
    NewBlocks.push_back(NewBB);

    if (NewLoop)
      NewLoop->addBasicBlockToLoop(NewBB, *LI);
    else if (ParentLoop)
      ParentLoop->addBasicBlockToLoop(NewBB, *LI);

    VMap[*BB] = NewBB;
    if (Header == *BB) {
      // For the first block, add a CFG connection to this newly
      // created block.
      InsertTop->getTerminator()->setSuccessor(0, NewBB);

    }
    if (Latch == *BB) {
      // For the last block, if UnrollProlog is true, create a direct jump to
      // InsertBot. If not, create a loop back to cloned head.
      VMap.erase((*BB)->getTerminator());
      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
      IRBuilder<> Builder(LatchBR);
      if (UnrollProlog) {
        Builder.CreateBr(InsertBot);
      } else {
        PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
                                          FirstLoopBB->getFirstNonPHI());
        Value *IdxSub =
            Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                              NewIdx->getName() + ".sub");
        Value *IdxCmp =
            Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
        NewIdx->addIncoming(NewIter, InsertTop);
        NewIdx->addIncoming(IdxSub, NewBB);
      }
      LatchBR->eraseFromParent();
    }
  }

  // Change the incoming values to the ones defined in the preheader or
  // cloned loop.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
    if (UnrollProlog) {
      VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
      cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
    } else {
      unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
      NewPHI->setIncomingBlock(idx, InsertTop);
      BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
      idx = NewPHI->getBasicBlockIndex(Latch);
      Value *InVal = NewPHI->getIncomingValue(idx);
      NewPHI->setIncomingBlock(idx, NewLatch);
      if (VMap[InVal])
        NewPHI->setIncomingValue(idx, VMap[InVal]);
    }
  }
  if (NewLoop) {
    // Add unroll disable metadata to disable future unrolling for this loop.
    SmallVector<Metadata *, 4> MDs;
    // Reserve first location for self reference to the LoopID metadata node.
    MDs.push_back(nullptr);
    MDNode *LoopID = NewLoop->getLoopID();
    if (LoopID) {
      // First remove any existing loop unrolling metadata.
      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
        bool IsUnrollMetadata = false;
        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
        if (MD) {
          const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
          IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
        }
        if (!IsUnrollMetadata)
          MDs.push_back(LoopID->getOperand(i));
      }
    }

    LLVMContext &Context = NewLoop->getHeader()->getContext();
    SmallVector<Metadata *, 1> DisableOperands;
    DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
    MDNode *DisableNode = MDNode::get(Context, DisableOperands);
    MDs.push_back(DisableNode);

    MDNode *NewLoopID = MDNode::get(Context, MDs);
    // Set operand 0 to refer to the loop id itself.
    NewLoopID->replaceOperandWith(0, NewLoopID);
    NewLoop->setLoopID(NewLoopID);
  }
}
コード例 #8
0
ファイル: LoopUnswitch.cpp プロジェクト: RCSL-HKUST/heterosim
/// UnswitchNontrivialCondition - We determined that the loop is profitable 
/// to unswitch when LIC equal Val.  Split it into loop versions and test the 
/// condition outside of either loop.  Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
                                               Loop *L) {
  Function *F = loopHeader->getParent();
  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
        << loopHeader->getName() << " [" << L->getBlocks().size()
        << " blocks] in Function " << F->getName()
        << " when '" << *Val << "' == " << *LIC << "\n");

  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
    SE->forgetLoop(L);

  LoopBlocks.clear();
  NewBlocks.clear();

  // First step, split the preheader and exit blocks, and add these blocks to
  // the LoopBlocks list.
  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
  LoopBlocks.push_back(NewPreheader);

  // We want the loop to come after the preheader, but before the exit blocks.
  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);

  // Split all of the edges from inside the loop to their exit blocks.  Update
  // the appropriate Phi nodes as we do so.
  SplitExitEdges(L, ExitBlocks);

  // The exit blocks may have been changed due to edge splitting, recompute.
  ExitBlocks.clear();
  L->getUniqueExitBlocks(ExitBlocks);

  // Add exit blocks to the loop blocks.
  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());

  // Next step, clone all of the basic blocks that make up the loop (including
  // the loop preheader and exit blocks), keeping track of the mapping between
  // the instructions and blocks.
  NewBlocks.reserve(LoopBlocks.size());
  ValueToValueMapTy VMap;
  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
    NewBlocks.push_back(NewBB);
    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
  }

  // Splice the newly inserted blocks into the function right before the
  // original preheader.
  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
                                NewBlocks[0], F->end());

  // Now we create the new Loop object for the versioned loop.
  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
  Loop *ParentLoop = L->getParentLoop();
  if (ParentLoop) {
    // Make sure to add the cloned preheader and exit blocks to the parent loop
    // as well.
    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
  }
  
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
    // The new exit block should be in the same loop as the old one.
    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
    
    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
           "Exit block should have been split to have one successor!");
    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);

    // If the successor of the exit block had PHI nodes, add an entry for
    // NewExit.
    PHINode *PN;
    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
      PN = cast<PHINode>(I);
      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
      ValueToValueMapTy::iterator It = VMap.find(V);
      if (It != VMap.end()) V = It->second;
      PN->addIncoming(V, NewExit);
    }
  }

  // Rewrite the code to refer to itself.
  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
  
  // Rewrite the original preheader to select between versions of the loop.
  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
         "Preheader splitting did not work correctly!");

  // Emit the new branch that selects between the two versions of this loop.
  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
  LPM->deleteSimpleAnalysisValue(OldBR, L);
  OldBR->eraseFromParent();

  LoopProcessWorklist.push_back(NewLoop);
  redoLoop = true;

  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
  // deletes the instruction (for example by simplifying a PHI that feeds into
  // the condition that we're unswitching on), we don't rewrite the second
  // iteration.
  WeakVH LICHandle(LIC);
  
  // Now we rewrite the original code to know that the condition is true and the
  // new code to know that the condition is false.
  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);

  // It's possible that simplifying one loop could cause the other to be
  // changed to another value or a constant.  If its a constant, don't simplify
  // it.
  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
      LICHandle && !isa<Constant>(LICHandle))
    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
コード例 #9
0
ファイル: LoopUnswitch.cpp プロジェクト: RCSL-HKUST/heterosim
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value.  Rewrite any uses of LIC or of properties correlated to it.
void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                                        Constant *Val,
                                                        bool IsEqual) {
  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
  
  // FIXME: Support correlated properties, like:
  //  for (...)
  //    if (li1 < li2)
  //      ...
  //    if (li1 > li2)
  //      ...
  
  // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
  // selects, switches.
  std::vector<Instruction*> Worklist;
  LLVMContext &Context = Val->getContext();


  // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
  // in the loop with the appropriate one directly.
  if (IsEqual || (isa<ConstantInt>(Val) &&
      Val->getType()->isIntegerTy(1))) {
    Value *Replacement;
    if (IsEqual)
      Replacement = Val;
    else
      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
                                     !cast<ConstantInt>(Val)->getZExtValue());
    
    for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
         UI != E; ++UI) {
      Instruction *U = dyn_cast<Instruction>(*UI);
      if (!U || !L->contains(U))
        continue;
      U->replaceUsesOfWith(LIC, Replacement);
      Worklist.push_back(U);
    }
    SimplifyCode(Worklist, L);
    return;
  }
  
  // Otherwise, we don't know the precise value of LIC, but we do know that it
  // is certainly NOT "Val".  As such, simplify any uses in the loop that we
  // can.  This case occurs when we unswitch switch statements.
  for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
       UI != E; ++UI) {
    Instruction *U = dyn_cast<Instruction>(*UI);
    if (!U || !L->contains(U))
      continue;

    Worklist.push_back(U);

    // TODO: We could do other simplifications, for example, turning 
    // 'icmp eq LIC, Val' -> false.

    // If we know that LIC is not Val, use this info to simplify code.
    SwitchInst *SI = dyn_cast<SwitchInst>(U);
    if (SI == 0 || !isa<ConstantInt>(Val)) continue;
    
    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
    if (DeadCase == 0) continue;  // Default case is live for multiple values.
    
    // Found a dead case value.  Don't remove PHI nodes in the 
    // successor if they become single-entry, those PHI nodes may
    // be in the Users list.

    BasicBlock *Switch = SI->getParent();
    BasicBlock *SISucc = SI->getSuccessor(DeadCase);
    BasicBlock *Latch = L->getLoopLatch();
    if (!SI->findCaseDest(SISucc)) continue;  // Edge is critical.
    // If the DeadCase successor dominates the loop latch, then the
    // transformation isn't safe since it will delete the sole predecessor edge
    // to the latch.
    if (Latch && DT->dominates(SISucc, Latch))
      continue;

    // FIXME: This is a hack.  We need to keep the successor around
    // and hooked up so as to preserve the loop structure, because
    // trying to update it is complicated.  So instead we preserve the
    // loop structure and put the block on a dead code path.
    SplitEdge(Switch, SISucc, this);
    // Compute the successors instead of relying on the return value
    // of SplitEdge, since it may have split the switch successor
    // after PHI nodes.
    BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
    BasicBlock *OldSISucc = *succ_begin(NewSISucc);
    // Create an "unreachable" destination.
    BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
                                           Switch->getParent(),
                                           OldSISucc);
    new UnreachableInst(Context, Abort);
    // Force the new case destination to branch to the "unreachable"
    // block while maintaining a (dead) CFG edge to the old block.
    NewSISucc->getTerminator()->eraseFromParent();
    BranchInst::Create(Abort, OldSISucc,
                       ConstantInt::getTrue(Context), NewSISucc);
    // Release the PHI operands for this edge.
    for (BasicBlock::iterator II = NewSISucc->begin();
         PHINode *PN = dyn_cast<PHINode>(II); ++II)
      PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
                           UndefValue::get(PN->getType()));
    // Tell the domtree about the new block. We don't fully update the
    // domtree here -- instead we force it to do a full recomputation
    // after the pass is complete -- but we do need to inform it of
    // new blocks.
    if (DT)
      DT->addNewBlock(Abort, NewSISucc);
  }
  
  SimplifyCode(Worklist, L);
}
コード例 #10
0
void PromoteMem2Reg::run() {
    Function &F = *DF.getRoot()->getParent();

    if (AST) PointerAllocaValues.resize(Allocas.size());
    AllocaDbgDeclares.resize(Allocas.size());

    AllocaInfo Info;
    LargeBlockInfo LBI;

    for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
        AllocaInst *AI = Allocas[AllocaNum];

        assert(isAllocaPromotable(AI) &&
               "Cannot promote non-promotable alloca!");
        assert(AI->getParent()->getParent() == &F &&
               "All allocas should be in the same function, which is same as DF!");

        if (AI->use_empty()) {
            // If there are no uses of the alloca, just delete it now.
            if (AST) AST->deleteValue(AI);
            AI->eraseFromParent();

            // Remove the alloca from the Allocas list, since it has been processed
            RemoveFromAllocasList(AllocaNum);
            ++NumDeadAlloca;
            continue;
        }

        // Calculate the set of read and write-locations for each alloca.  This is
        // analogous to finding the 'uses' and 'definitions' of each variable.
        Info.AnalyzeAlloca(AI);

        // If there is only a single store to this value, replace any loads of
        // it that are directly dominated by the definition with the value stored.
        if (Info.DefiningBlocks.size() == 1) {
            RewriteSingleStoreAlloca(AI, Info, LBI);

            // Finally, after the scan, check to see if the store is all that is left.
            if (Info.UsingBlocks.empty()) {
                // Record debuginfo for the store and remove the declaration's debuginfo.
                if (DbgDeclareInst *DDI = Info.DbgDeclare) {
                    ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
                    DDI->eraseFromParent();
                }
                // Remove the (now dead) store and alloca.
                Info.OnlyStore->eraseFromParent();
                LBI.deleteValue(Info.OnlyStore);

                if (AST) AST->deleteValue(AI);
                AI->eraseFromParent();
                LBI.deleteValue(AI);

                // The alloca has been processed, move on.
                RemoveFromAllocasList(AllocaNum);

                ++NumSingleStore;
                continue;
            }
        }

        // If the alloca is only read and written in one basic block, just perform a
        // linear sweep over the block to eliminate it.
        if (Info.OnlyUsedInOneBlock) {
            PromoteSingleBlockAlloca(AI, Info, LBI);

            // Finally, after the scan, check to see if the stores are all that is
            // left.
            if (Info.UsingBlocks.empty()) {

                // Remove the (now dead) stores and alloca.
                while (!AI->use_empty()) {
                    StoreInst *SI = cast<StoreInst>(AI->use_back());
                    // Record debuginfo for the store before removing it.
                    if (DbgDeclareInst *DDI = Info.DbgDeclare)
                        ConvertDebugDeclareToDebugValue(DDI, SI);
                    SI->eraseFromParent();
                    LBI.deleteValue(SI);
                }

                if (AST) AST->deleteValue(AI);
                AI->eraseFromParent();
                LBI.deleteValue(AI);

                // The alloca has been processed, move on.
                RemoveFromAllocasList(AllocaNum);

                // The alloca's debuginfo can be removed as well.
                if (DbgDeclareInst *DDI = Info.DbgDeclare)
                    DDI->eraseFromParent();

                ++NumLocalPromoted;
                continue;
            }
        }

        // If we haven't computed a numbering for the BB's in the function, do so
        // now.
        if (BBNumbers.empty()) {
            unsigned ID = 0;
            for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
                BBNumbers[I] = ID++;
        }

        // If we have an AST to keep updated, remember some pointer value that is
        // stored into the alloca.
        if (AST)
            PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;

        // Remember the dbg.declare intrinsic describing this alloca, if any.
        if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;

        // Keep the reverse mapping of the 'Allocas' array for the rename pass.
        AllocaLookup[Allocas[AllocaNum]] = AllocaNum;

        // At this point, we're committed to promoting the alloca using IDF's, and
        // the standard SSA construction algorithm.  Determine which blocks need PHI
        // nodes and see if we can optimize out some work by avoiding insertion of
        // dead phi nodes.
        DetermineInsertionPoint(AI, AllocaNum, Info);
    }

    if (Allocas.empty())
        return; // All of the allocas must have been trivial!

    LBI.clear();


    // Set the incoming values for the basic block to be null values for all of
    // the alloca's.  We do this in case there is a load of a value that has not
    // been stored yet.  In this case, it will get this null value.
    //
    RenamePassData::ValVector Values(Allocas.size());
    for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
        Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());

    // Walks all basic blocks in the function performing the SSA rename algorithm
    // and inserting the phi nodes we marked as necessary
    //
    std::vector<RenamePassData> RenamePassWorkList;
    RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
    do {
        RenamePassData RPD;
        RPD.swap(RenamePassWorkList.back());
        RenamePassWorkList.pop_back();
        // RenamePass may add new worklist entries.
        RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
    } while (!RenamePassWorkList.empty());

    // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
    Visited.clear();

    // Remove the allocas themselves from the function.
    for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
        Instruction *A = Allocas[i];

        // If there are any uses of the alloca instructions left, they must be in
        // sections of dead code that were not processed on the dominance frontier.
        // Just delete the users now.
        //
        if (!A->use_empty())
            A->replaceAllUsesWith(UndefValue::get(A->getType()));
        if (AST) AST->deleteValue(A);
        A->eraseFromParent();
    }

    // Remove alloca's dbg.declare instrinsics from the function.
    for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
        if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
            DDI->eraseFromParent();

    // Loop over all of the PHI nodes and see if there are any that we can get
    // rid of because they merge all of the same incoming values.  This can
    // happen due to undef values coming into the PHI nodes.  This process is
    // iterative, because eliminating one PHI node can cause others to be removed.
    bool EliminatedAPHI = true;
    while (EliminatedAPHI) {
        EliminatedAPHI = false;

        for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
                    NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
            PHINode *PN = I->second;

            // If this PHI node merges one value and/or undefs, get the value.
            if (Value *V = PN->hasConstantValue(&DT)) {
                if (AST && PN->getType()->isPointerTy())
                    AST->deleteValue(PN);
                PN->replaceAllUsesWith(V);
                PN->eraseFromParent();
                NewPhiNodes.erase(I++);
                EliminatedAPHI = true;
                continue;
            }
            ++I;
        }
    }

    // At this point, the renamer has added entries to PHI nodes for all reachable
    // code.  Unfortunately, there may be unreachable blocks which the renamer
    // hasn't traversed.  If this is the case, the PHI nodes may not
    // have incoming values for all predecessors.  Loop over all PHI nodes we have
    // created, inserting undef values if they are missing any incoming values.
    //
    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
                NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
        // We want to do this once per basic block.  As such, only process a block
        // when we find the PHI that is the first entry in the block.
        PHINode *SomePHI = I->second;
        BasicBlock *BB = SomePHI->getParent();
        if (&BB->front() != SomePHI)
            continue;

        // Only do work here if there the PHI nodes are missing incoming values.  We
        // know that all PHI nodes that were inserted in a block will have the same
        // number of incoming values, so we can just check any of them.
        if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
            continue;

        // Get the preds for BB.
        SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));

        // Ok, now we know that all of the PHI nodes are missing entries for some
        // basic blocks.  Start by sorting the incoming predecessors for efficient
        // access.
        std::sort(Preds.begin(), Preds.end());

        // Now we loop through all BB's which have entries in SomePHI and remove
        // them from the Preds list.
        for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
            // Do a log(n) search of the Preds list for the entry we want.
            SmallVector<BasicBlock*, 16>::iterator EntIt =
                std::lower_bound(Preds.begin(), Preds.end(),
                                 SomePHI->getIncomingBlock(i));
            assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
                   "PHI node has entry for a block which is not a predecessor!");

            // Remove the entry
            Preds.erase(EntIt);
        }

        // At this point, the blocks left in the preds list must have dummy
        // entries inserted into every PHI nodes for the block.  Update all the phi
        // nodes in this block that we are inserting (there could be phis before
        // mem2reg runs).
        unsigned NumBadPreds = SomePHI->getNumIncomingValues();
        BasicBlock::iterator BBI = BB->begin();
        while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
                SomePHI->getNumIncomingValues() == NumBadPreds) {
            Value *UndefVal = UndefValue::get(SomePHI->getType());
            for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
                SomePHI->addIncoming(UndefVal, Preds[pred]);
        }
    }

    NewPhiNodes.clear();
}
コード例 #11
0
/// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
/// are blocks which lead to uses.  Knowing this allows us to avoid inserting
/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
/// would be dead).
void PromoteMem2Reg::
ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
                    const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
                    SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {

    // To determine liveness, we must iterate through the predecessors of blocks
    // where the def is live.  Blocks are added to the worklist if we need to
    // check their predecessors.  Start with all the using blocks.
    SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
    LiveInBlockWorklist.insert(LiveInBlockWorklist.end(),
                               Info.UsingBlocks.begin(), Info.UsingBlocks.end());

    // If any of the using blocks is also a definition block, check to see if the
    // definition occurs before or after the use.  If it happens before the use,
    // the value isn't really live-in.
    for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
        BasicBlock *BB = LiveInBlockWorklist[i];
        if (!DefBlocks.count(BB)) continue;

        // Okay, this is a block that both uses and defines the value.  If the first
        // reference to the alloca is a def (store), then we know it isn't live-in.
        for (BasicBlock::iterator I = BB->begin(); ; ++I) {
            if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
                if (SI->getOperand(1) != AI) continue;

                // We found a store to the alloca before a load.  The alloca is not
                // actually live-in here.
                LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
                LiveInBlockWorklist.pop_back();
                --i, --e;
                break;
            }

            if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
                if (LI->getOperand(0) != AI) continue;

                // Okay, we found a load before a store to the alloca.  It is actually
                // live into this block.
                break;
            }
        }
    }

    // Now that we have a set of blocks where the phi is live-in, recursively add
    // their predecessors until we find the full region the value is live.
    while (!LiveInBlockWorklist.empty()) {
        BasicBlock *BB = LiveInBlockWorklist.pop_back_val();

        // The block really is live in here, insert it into the set.  If already in
        // the set, then it has already been processed.
        if (!LiveInBlocks.insert(BB))
            continue;

        // Since the value is live into BB, it is either defined in a predecessor or
        // live into it to.  Add the preds to the worklist unless they are a
        // defining block.
        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
            BasicBlock *P = *PI;

            // The value is not live into a predecessor if it defines the value.
            if (DefBlocks.count(P))
                continue;

            // Otherwise it is, add to the worklist.
            LiveInBlockWorklist.push_back(P);
        }
    }
}
コード例 #12
0
ファイル: SanitizerCoverage.cpp プロジェクト: mars-rover/llvm
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
                                                    bool UseCalls) {
  // Don't insert coverage for unreachable blocks: we will never call
  // __sanitizer_cov() for them, so counting them in
  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
  // percentage. Also, unreachable instructions frequently have no debug
  // locations.
  if (isa<UnreachableInst>(BB.getTerminator()))
    return;
  BasicBlock::iterator IP = BB.getFirstInsertionPt();

  bool IsEntryBB = &BB == &F.getEntryBlock();
  DebugLoc EntryLoc;
  if (IsEntryBB) {
    if (auto SP = getDISubprogram(&F))
      EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
    // Keep static allocas and llvm.localescape calls in the entry block.  Even
    // if we aren't splitting the block, it's nice for allocas to be before
    // calls.
    IP = PrepareToSplitEntryBlock(BB, IP);
  } else {
    EntryLoc = IP->getDebugLoc();
  }

  IRBuilder<> IRB(&*IP);
  IRB.SetCurrentDebugLocation(EntryLoc);
  Value *GuardP = IRB.CreateAdd(
      IRB.CreatePointerCast(GuardArray, IntptrTy),
      ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
  GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
  if (Options.TraceBB) {
    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
  } else if (UseCalls) {
    IRB.CreateCall(SanCovWithCheckFunction, GuardP);
  } else {
    LoadInst *Load = IRB.CreateLoad(GuardP);
    Load->setAtomic(Monotonic);
    Load->setAlignment(4);
    SetNoSanitizeMetadata(Load);
    Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
    Instruction *Ins = SplitBlockAndInsertIfThen(
        Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
    IRB.SetInsertPoint(Ins);
    IRB.SetCurrentDebugLocation(EntryLoc);
    // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
    IRB.CreateCall(SanCovFunction, GuardP);
    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
  }

  if (Options.Use8bitCounters) {
    IRB.SetInsertPoint(&*IP);
    Value *P = IRB.CreateAdd(
        IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
        ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
    P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
    LoadInst *LI = IRB.CreateLoad(P);
    Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
    StoreInst *SI = IRB.CreateStore(Inc, P);
    SetNoSanitizeMetadata(LI);
    SetNoSanitizeMetadata(SI);
  }
}
コード例 #13
0
/// InsertStackProtectors - Insert code into the prologue and epilogue of the
/// function.
///
///  - The prologue code loads and stores the stack guard onto the stack.
///  - The epilogue checks the value stored in the prologue against the original
///    value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
  bool HasPrologue = false;
  bool SupportsSelectionDAGSP =
      EnableSelectionDAGSP && !TM->Options.EnableFastISel;
  AllocaInst *AI = nullptr;       // Place on stack that stores the stack guard.
  Value *StackGuardVar = nullptr; // The stack guard variable.

  for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
    BasicBlock *BB = &*I++;
    ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
    if (!RI)
      continue;

    if (!HasPrologue) {
      HasPrologue = true;
      SupportsSelectionDAGSP &=
          CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar);
    }

    if (SupportsSelectionDAGSP) {
      // Since we have a potential tail call, insert the special stack check
      // intrinsic.
      Instruction *InsertionPt = nullptr;
      if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) {
        InsertionPt = CI;
      } else {
        InsertionPt = RI;
        // At this point we know that BB has a return statement so it *DOES*
        // have a terminator.
        assert(InsertionPt != nullptr &&
               "BB must have a terminator instruction at this point.");
      }

      Function *Intrinsic =
          Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck);
      CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt);
    } else {
      // If we do not support SelectionDAG based tail calls, generate IR level
      // tail calls.
      //
      // For each block with a return instruction, convert this:
      //
      //   return:
      //     ...
      //     ret ...
      //
      // into this:
      //
      //   return:
      //     ...
      //     %1 = load __stack_chk_guard
      //     %2 = load StackGuardSlot
      //     %3 = cmp i1 %1, %2
      //     br i1 %3, label %SP_return, label %CallStackCheckFailBlk
      //
      //   SP_return:
      //     ret ...
      //
      //   CallStackCheckFailBlk:
      //     call void @__stack_chk_fail()
      //     unreachable

      // Create the FailBB. We duplicate the BB every time since the MI tail
      // merge pass will merge together all of the various BB into one including
      // fail BB generated by the stack protector pseudo instruction.
      BasicBlock *FailBB = CreateFailBB();

      // Split the basic block before the return instruction.
      BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");

      // Update the dominator tree if we need to.
      if (DT && DT->isReachableFromEntry(BB)) {
        DT->addNewBlock(NewBB, BB);
        DT->addNewBlock(FailBB, BB);
      }

      // Remove default branch instruction to the new BB.
      BB->getTerminator()->eraseFromParent();

      // Move the newly created basic block to the point right after the old
      // basic block so that it's in the "fall through" position.
      NewBB->moveAfter(BB);

      // Generate the stack protector instructions in the old basic block.
      IRBuilder<> B(BB);
      LoadInst *LI1 = B.CreateLoad(StackGuardVar);
      LoadInst *LI2 = B.CreateLoad(AI);
      Value *Cmp = B.CreateICmpEQ(LI1, LI2);
      unsigned SuccessWeight =
          BranchProbabilityInfo::getBranchWeightStackProtector(true);
      unsigned FailureWeight =
          BranchProbabilityInfo::getBranchWeightStackProtector(false);
      MDNode *Weights = MDBuilder(F->getContext())
                            .createBranchWeights(SuccessWeight, FailureWeight);
      B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
    }
  }

  // Return if we didn't modify any basic blocks. i.e., there are no return
  // statements in the function.
  return HasPrologue;
}
コード例 #14
0
ファイル: IntegerDivision.cpp プロジェクト: Midas8181919/llvm
/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
/// quotient, rounded towards 0. Builder's insert point should be pointing where
/// the caller wants code generated, e.g. at the udiv instruction.
static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
                                           IRBuilder<> &Builder) {
  // The basic algorithm can be found in the compiler-rt project's
  // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
  // that's been hand-tuned to lessen the amount of control flow involved.

  // Some helper values
  IntegerType *I32Ty = Builder.getInt32Ty();

  ConstantInt *Zero      = Builder.getInt32(0);
  ConstantInt *One       = Builder.getInt32(1);
  ConstantInt *ThirtyOne = Builder.getInt32(31);
  ConstantInt *NegOne    = ConstantInt::getSigned(I32Ty, -1);
  ConstantInt *True      = Builder.getTrue();

  BasicBlock *IBB = Builder.GetInsertBlock();
  Function *F = IBB->getParent();
  Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
                                                I32Ty);

  // Our CFG is going to look like:
  // +---------------------+
  // | special-cases       |
  // |   ...               |
  // +---------------------+
  //  |       |
  //  |   +----------+
  //  |   |  bb1     |
  //  |   |  ...     |
  //  |   +----------+
  //  |    |      |
  //  |    |  +------------+
  //  |    |  |  preheader |
  //  |    |  |  ...       |
  //  |    |  +------------+
  //  |    |      |
  //  |    |      |      +---+
  //  |    |      |      |   |
  //  |    |  +------------+ |
  //  |    |  |  do-while  | |
  //  |    |  |  ...       | |
  //  |    |  +------------+ |
  //  |    |      |      |   |
  //  |   +-----------+  +---+
  //  |   | loop-exit |
  //  |   |  ...      |
  //  |   +-----------+
  //  |     |
  // +-------+
  // | ...   |
  // | end   |
  // +-------+
  BasicBlock *SpecialCases = Builder.GetInsertBlock();
  SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
  BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
                                                  "udiv-end");
  BasicBlock *LoopExit  = BasicBlock::Create(Builder.getContext(),
                                             "udiv-loop-exit", F, End);
  BasicBlock *DoWhile   = BasicBlock::Create(Builder.getContext(),
                                             "udiv-do-while", F, End);
  BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
                                             "udiv-preheader", F, End);
  BasicBlock *BB1       = BasicBlock::Create(Builder.getContext(),
                                             "udiv-bb1", F, End);

  // We'll be overwriting the terminator to insert our extra blocks
  SpecialCases->getTerminator()->eraseFromParent();

  // First off, check for special cases: dividend or divisor is zero, divisor
  // is greater than dividend, and divisor is 1.
  // ; special-cases:
  // ;   %ret0_1      = icmp eq i32 %divisor, 0
  // ;   %ret0_2      = icmp eq i32 %dividend, 0
  // ;   %ret0_3      = or i1 %ret0_1, %ret0_2
  // ;   %tmp0        = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
  // ;   %tmp1        = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
  // ;   %sr          = sub nsw i32 %tmp0, %tmp1
  // ;   %ret0_4      = icmp ugt i32 %sr, 31
  // ;   %ret0        = or i1 %ret0_3, %ret0_4
  // ;   %retDividend = icmp eq i32 %sr, 31
  // ;   %retVal      = select i1 %ret0, i32 0, i32 %dividend
  // ;   %earlyRet    = or i1 %ret0, %retDividend
  // ;   br i1 %earlyRet, label %end, label %bb1
  Builder.SetInsertPoint(SpecialCases);
  Value *Ret0_1      = Builder.CreateICmpEQ(Divisor, Zero);
  Value *Ret0_2      = Builder.CreateICmpEQ(Dividend, Zero);
  Value *Ret0_3      = Builder.CreateOr(Ret0_1, Ret0_2);
  Value *Tmp0        = Builder.CreateCall2(CTLZi32, Divisor, True);
  Value *Tmp1        = Builder.CreateCall2(CTLZi32, Dividend, True);
  Value *SR          = Builder.CreateSub(Tmp0, Tmp1);
  Value *Ret0_4      = Builder.CreateICmpUGT(SR, ThirtyOne);
  Value *Ret0        = Builder.CreateOr(Ret0_3, Ret0_4);
  Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
  Value *RetVal      = Builder.CreateSelect(Ret0, Zero, Dividend);
  Value *EarlyRet    = Builder.CreateOr(Ret0, RetDividend);
  Builder.CreateCondBr(EarlyRet, End, BB1);

  // ; bb1:                                             ; preds = %special-cases
  // ;   %sr_1     = add i32 %sr, 1
  // ;   %tmp2     = sub i32 31, %sr
  // ;   %q        = shl i32 %dividend, %tmp2
  // ;   %skipLoop = icmp eq i32 %sr_1, 0
  // ;   br i1 %skipLoop, label %loop-exit, label %preheader
  Builder.SetInsertPoint(BB1);
  Value *SR_1     = Builder.CreateAdd(SR, One);
  Value *Tmp2     = Builder.CreateSub(ThirtyOne, SR);
  Value *Q        = Builder.CreateShl(Dividend, Tmp2);
  Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
  Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);

  // ; preheader:                                           ; preds = %bb1
  // ;   %tmp3 = lshr i32 %dividend, %sr_1
  // ;   %tmp4 = add i32 %divisor, -1
  // ;   br label %do-while
  Builder.SetInsertPoint(Preheader);
  Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
  Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
  Builder.CreateBr(DoWhile);

  // ; do-while:                                 ; preds = %do-while, %preheader
  // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
  // ;   %sr_3    = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
  // ;   %r_1     = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
  // ;   %q_2     = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
  // ;   %tmp5  = shl i32 %r_1, 1
  // ;   %tmp6  = lshr i32 %q_2, 31
  // ;   %tmp7  = or i32 %tmp5, %tmp6
  // ;   %tmp8  = shl i32 %q_2, 1
  // ;   %q_1   = or i32 %carry_1, %tmp8
  // ;   %tmp9  = sub i32 %tmp4, %tmp7
  // ;   %tmp10 = ashr i32 %tmp9, 31
  // ;   %carry = and i32 %tmp10, 1
  // ;   %tmp11 = and i32 %tmp10, %divisor
  // ;   %r     = sub i32 %tmp7, %tmp11
  // ;   %sr_2  = add i32 %sr_3, -1
  // ;   %tmp12 = icmp eq i32 %sr_2, 0
  // ;   br i1 %tmp12, label %loop-exit, label %do-while
  Builder.SetInsertPoint(DoWhile);
  PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
  PHINode *SR_3    = Builder.CreatePHI(I32Ty, 2);
  PHINode *R_1     = Builder.CreatePHI(I32Ty, 2);
  PHINode *Q_2     = Builder.CreatePHI(I32Ty, 2);
  Value *Tmp5  = Builder.CreateShl(R_1, One);
  Value *Tmp6  = Builder.CreateLShr(Q_2, ThirtyOne);
  Value *Tmp7  = Builder.CreateOr(Tmp5, Tmp6);
  Value *Tmp8  = Builder.CreateShl(Q_2, One);
  Value *Q_1   = Builder.CreateOr(Carry_1, Tmp8);
  Value *Tmp9  = Builder.CreateSub(Tmp4, Tmp7);
  Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
  Value *Carry = Builder.CreateAnd(Tmp10, One);
  Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
  Value *R     = Builder.CreateSub(Tmp7, Tmp11);
  Value *SR_2  = Builder.CreateAdd(SR_3, NegOne);
  Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
  Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);

  // ; loop-exit:                                      ; preds = %do-while, %bb1
  // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
  // ;   %q_3     = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
  // ;   %tmp13 = shl i32 %q_3, 1
  // ;   %q_4   = or i32 %carry_2, %tmp13
  // ;   br label %end
  Builder.SetInsertPoint(LoopExit);
  PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
  PHINode *Q_3     = Builder.CreatePHI(I32Ty, 2);
  Value *Tmp13 = Builder.CreateShl(Q_3, One);
  Value *Q_4   = Builder.CreateOr(Carry_2, Tmp13);
  Builder.CreateBr(End);

  // ; end:                                 ; preds = %loop-exit, %special-cases
  // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
  // ;   ret i32 %q_5
  Builder.SetInsertPoint(End, End->begin());
  PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);

  // Populate the Phis, since all values have now been created. Our Phis were:
  // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
  Carry_1->addIncoming(Zero, Preheader);
  Carry_1->addIncoming(Carry, DoWhile);
  // ;   %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
  SR_3->addIncoming(SR_1, Preheader);
  SR_3->addIncoming(SR_2, DoWhile);
  // ;   %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
  R_1->addIncoming(Tmp3, Preheader);
  R_1->addIncoming(R, DoWhile);
  // ;   %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
  Q_2->addIncoming(Q, Preheader);
  Q_2->addIncoming(Q_1, DoWhile);
  // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
  Carry_2->addIncoming(Zero, BB1);
  Carry_2->addIncoming(Carry, DoWhile);
  // ;   %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
  Q_3->addIncoming(Q, BB1);
  Q_3->addIncoming(Q_1, DoWhile);
  // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
  Q_5->addIncoming(Q_4, LoopExit);
  Q_5->addIncoming(RetVal, SpecialCases);

  return Q_5;
}
コード例 #15
0
bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
  AtomicOrdering FailureOrder = CI->getFailureOrdering();
  Value *Addr = CI->getPointerOperand();
  BasicBlock *BB = CI->getParent();
  Function *F = BB->getParent();
  LLVMContext &Ctx = F->getContext();

  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
  //
  // The full expansion we produce is:
  //     [...]
  //     fence?
  // cmpxchg.start:
  //     %loaded = @load.linked(%addr)
  //     %should_store = icmp eq %loaded, %desired
  //     br i1 %should_store, label %cmpxchg.trystore,
  //                          label %cmpxchg.failure
  // cmpxchg.trystore:
  //     %stored = @store_conditional(%new, %addr)
  //     %success = icmp eq i32 %stored, 0
  //     br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
  // cmpxchg.success:
  //     fence?
  //     br label %cmpxchg.end
  // cmpxchg.failure:
  //     fence?
  //     br label %cmpxchg.end
  // cmpxchg.end:
  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
  //     [...]
  BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
  auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);

  // This grabs the DebugLoc from CI
  IRBuilder<> Builder(CI);

  // The split call above "helpfully" added a branch at the end of BB (to the
  // wrong place), but we might want a fence too. It's easiest to just remove
  // the branch entirely.
  std::prev(BB->end())->eraseFromParent();
  Builder.SetInsertPoint(BB);
  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
  Builder.CreateBr(LoopBB);

  // Start the main loop block now that we've taken care of the preliminaries.
  Builder.SetInsertPoint(LoopBB);
  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
      Builder, Addr, MemOpOrder);
  Value *ShouldStore =
      Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");

  // If the the cmpxchg doesn't actually need any ordering when it fails, we can
  // jump straight past that fence instruction (if it exists).
  Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);

  Builder.SetInsertPoint(TryStoreBB);
  Value *StoreSuccess =
      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
          Builder, CI->getNewValOperand(), Addr, MemOpOrder);
  StoreSuccess = Builder.CreateICmpEQ(
      StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
  Builder.CreateCondBr(StoreSuccess, SuccessBB,
                       CI->isWeak() ? FailureBB : LoopBB);

  // Make sure later instructions don't get reordered with a fence if necessary.
  Builder.SetInsertPoint(SuccessBB);
  insertTrailingFence(Builder, SuccessOrder);
  Builder.CreateBr(ExitBB);

  Builder.SetInsertPoint(FailureBB);
  insertTrailingFence(Builder, FailureOrder);
  Builder.CreateBr(ExitBB);

  // Finally, we have control-flow based knowledge of whether the cmpxchg
  // succeeded or not. We expose this to later passes by converting any
  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.

  // Setup the builder so we can create any PHIs we need.
  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);

  // Look for any users of the cmpxchg that are just comparing the loaded value
  // against the desired one, and replace them with the CFG-derived version.
  SmallVector<ExtractValueInst *, 2> PrunedInsts;
  for (auto User : CI->users()) {
    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
    if (!EV)
      continue;

    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
           "weird extraction from { iN, i1 }");

    if (EV->getIndices()[0] == 0)
      EV->replaceAllUsesWith(Loaded);
    else
      EV->replaceAllUsesWith(Success);

    PrunedInsts.push_back(EV);
  }

  // We can remove the instructions now we're no longer iterating through them.
  for (auto EV : PrunedInsts)
    EV->eraseFromParent();

  if (!CI->use_empty()) {
    // Some use of the full struct return that we don't understand has happened,
    // so we've got to reconstruct it properly.
    Value *Res;
    Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
    Res = Builder.CreateInsertValue(Res, Success, 1);

    CI->replaceAllUsesWith(Res);
  }

  CI->eraseFromParent();
  return true;
}
コード例 #16
0
ファイル: LoopUnswitch.cpp プロジェクト: RCSL-HKUST/heterosim
/// SimplifyCode - Okay, now that we have simplified some instructions in the
/// loop, walk over it and constant prop, dce, and fold control flow where
/// possible.  Note that this is effectively a very simple loop-structure-aware
/// optimizer.  During processing of this loop, L could very well be deleted, so
/// it must not be used.
///
/// FIXME: When the loop optimizer is more mature, separate this out to a new
/// pass.
///
void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();

    // Simple DCE.
    if (isInstructionTriviallyDead(I)) {
      DEBUG(dbgs() << "Remove dead instruction '" << *I);
      
      // Add uses to the worklist, which may be dead now.
      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
          Worklist.push_back(Use);
      LPM->deleteSimpleAnalysisValue(I, L);
      RemoveFromWorklist(I, Worklist);
      I->eraseFromParent();
      ++NumSimplify;
      continue;
    }

    // See if instruction simplification can hack this up.  This is common for
    // things like "select false, X, Y" after unswitching made the condition be
    // 'false'.
    if (Value *V = SimplifyInstruction(I, 0, DT))
      if (LI->replacementPreservesLCSSAForm(I, V)) {
        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
        continue;
      }

    // Special case hacks that appear commonly in unswitched code.
    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
      if (BI->isUnconditional()) {
        // If BI's parent is the only pred of the successor, fold the two blocks
        // together.
        BasicBlock *Pred = BI->getParent();
        BasicBlock *Succ = BI->getSuccessor(0);
        BasicBlock *SinglePred = Succ->getSinglePredecessor();
        if (!SinglePred) continue;  // Nothing to do.
        assert(SinglePred == Pred && "CFG broken");

        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
              << Succ->getName() << "\n");
        
        // Resolve any single entry PHI nodes in Succ.
        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
        
        // If Succ has any successors with PHI nodes, update them to have
        // entries coming from Pred instead of Succ.
        Succ->replaceAllUsesWith(Pred);
        
        // Move all of the successor contents from Succ to Pred.
        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                   Succ->end());
        LPM->deleteSimpleAnalysisValue(BI, L);
        BI->eraseFromParent();
        RemoveFromWorklist(BI, Worklist);
        
        // Remove Succ from the loop tree.
        LI->removeBlock(Succ);
        LPM->deleteSimpleAnalysisValue(Succ, L);
        Succ->eraseFromParent();
        ++NumSimplify;
        continue;
      }
      
      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
        // Conditional branch.  Turn it into an unconditional branch, then
        // remove dead blocks.
        continue;  // FIXME: Enable.

        DEBUG(dbgs() << "Folded branch: " << *BI);
        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
        DeadSucc->removePredecessor(BI->getParent(), true);
        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
        LPM->deleteSimpleAnalysisValue(BI, L);
        BI->eraseFromParent();
        RemoveFromWorklist(BI, Worklist);
        ++NumSimplify;

        RemoveBlockIfDead(DeadSucc, Worklist, L);
      }
      continue;
    }
  }
}
コード例 #17
0
/// SimplifyStoreAtEndOfBlock - Turn things like:
///   if () { *P = v1; } else { *P = v2 }
/// into a phi node with a store in the successor.
///
/// Simplify things like:
///   *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  BasicBlock *StoreBB = SI.getParent();

  // Check to see if the successor block has exactly two incoming edges.  If
  // so, see if the other predecessor contains a store to the same location.
  // if so, insert a PHI node (if needed) and move the stores down.
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);

  // Determine whether Dest has exactly two predecessors and, if so, compute
  // the other predecessor.
  pred_iterator PI = pred_begin(DestBB);
  BasicBlock *P = *PI;
  BasicBlock *OtherBB = nullptr;

  if (P != StoreBB)
    OtherBB = P;

  if (++PI == pred_end(DestBB))
    return false;

  P = *PI;
  if (P != StoreBB) {
    if (OtherBB)
      return false;
    OtherBB = P;
  }
  if (++PI != pred_end(DestBB))
    return false;

  // Bail out if all the relevant blocks aren't distinct (this can happen,
  // for example, if SI is in an infinite loop)
  if (StoreBB == DestBB || OtherBB == DestBB)
    return false;

  // Verify that the other block ends in a branch and is not otherwise empty.
  BasicBlock::iterator BBI(OtherBB->getTerminator());
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  if (!OtherBr || BBI == OtherBB->begin())
    return false;

  // If the other block ends in an unconditional branch, check for the 'if then
  // else' case.  there is an instruction before the branch.
  StoreInst *OtherStore = nullptr;
  if (OtherBr->isUnconditional()) {
    --BBI;
    // Skip over debugging info.
    while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
      if (BBI==OtherBB->begin())
        return false;
      --BBI;
    }
    // If this isn't a store, isn't a store to the same location, or is not the
    // right kind of store, bail out.
    OtherStore = dyn_cast<StoreInst>(BBI);
    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
        !SI.isSameOperationAs(OtherStore))
      return false;
  } else {
    // Otherwise, the other block ended with a conditional branch. If one of the
    // destinations is StoreBB, then we have the if/then case.
    if (OtherBr->getSuccessor(0) != StoreBB &&
        OtherBr->getSuccessor(1) != StoreBB)
      return false;

    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    // if/then triangle.  See if there is a store to the same ptr as SI that
    // lives in OtherBB.
    for (;; --BBI) {
      // Check to see if we find the matching store.
      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
            !SI.isSameOperationAs(OtherStore))
          return false;
        break;
      }
      // If we find something that may be using or overwriting the stored
      // value, or if we run out of instructions, we can't do the xform.
      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
          BBI == OtherBB->begin())
        return false;
    }

    // In order to eliminate the store in OtherBr, we have to
    // make sure nothing reads or overwrites the stored value in
    // StoreBB.
    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
      // FIXME: This should really be AA driven.
      if (I->mayReadFromMemory() || I->mayWriteToMemory())
        return false;
    }
  }

  // Insert a PHI node now if we need it.
  Value *MergedVal = OtherStore->getOperand(0);
  if (MergedVal != SI.getOperand(0)) {
    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
    PN->addIncoming(SI.getOperand(0), SI.getParent());
    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    MergedVal = InsertNewInstBefore(PN, DestBB->front());
  }

  // Advance to a place where it is safe to insert the new store and
  // insert it.
  BBI = DestBB->getFirstInsertionPt();
  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
                                   SI.isVolatile(),
                                   SI.getAlignment(),
                                   SI.getOrdering(),
                                   SI.getSynchScope());
  InsertNewInstBefore(NewSI, *BBI);
  NewSI->setDebugLoc(OtherStore->getDebugLoc());

  // If the two stores had AA tags, merge them.
  AAMDNodes AATags;
  SI.getAAMetadata(AATags);
  if (AATags) {
    OtherStore->getAAMetadata(AATags, /* Merge = */ true);
    NewSI->setAAMetadata(AATags);
  }

  // Nuke the old stores.
  eraseInstFromFunction(SI);
  eraseInstFromFunction(*OtherStore);
  return true;
}
コード例 #18
0
ファイル: LoopDeletion.cpp プロジェクト: AlexZhao/freebsd
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
/// observable behavior of the program other than finite running time.  Note
/// we do ensure that this never remove a loop that might be infinite, as doing
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
  // We can only remove the loop if there is a preheader that we can
  // branch from after removing it.
  BasicBlock *preheader = L->getLoopPreheader();
  if (!preheader)
    return false;

  // If LoopSimplify form is not available, stay out of trouble.
  if (!L->hasDedicatedExits())
    return false;

  // We can't remove loops that contain subloops.  If the subloops were dead,
  // they would already have been removed in earlier executions of this pass.
  if (L->begin() != L->end())
    return false;

  SmallVector<BasicBlock*, 4> exitingBlocks;
  L->getExitingBlocks(exitingBlocks);

  SmallVector<BasicBlock*, 4> exitBlocks;
  L->getUniqueExitBlocks(exitBlocks);

  // We require that the loop only have a single exit block.  Otherwise, we'd
  // be in the situation of needing to be able to solve statically which exit
  // block will be branched to, or trying to preserve the branching logic in
  // a loop invariant manner.
  if (exitBlocks.size() != 1)
    return false;

  // Finally, we have to check that the loop really is dead.
  bool Changed = false;
  if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
    return Changed;

  // Don't remove loops for which we can't solve the trip count.
  // They could be infinite, in which case we'd be changing program behavior.
  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(S))
    return Changed;

  // Now that we know the removal is safe, remove the loop by changing the
  // branch from the preheader to go to the single exit block.
  BasicBlock *exitBlock = exitBlocks[0];

  // Because we're deleting a large chunk of code at once, the sequence in which
  // we remove things is very important to avoid invalidation issues.  Don't
  // mess with this unless you have good reason and know what you're doing.

  // Tell ScalarEvolution that the loop is deleted. Do this before
  // deleting the loop so that ScalarEvolution can look at the loop
  // to determine what it needs to clean up.
  SE.forgetLoop(L);

  // Connect the preheader directly to the exit block.
  TerminatorInst *TI = preheader->getTerminator();
  TI->replaceUsesOfWith(L->getHeader(), exitBlock);

  // Rewrite phis in the exit block to get their inputs from
  // the preheader instead of the exiting block.
  BasicBlock *exitingBlock = exitingBlocks[0];
  BasicBlock::iterator BI = exitBlock->begin();
  while (PHINode *P = dyn_cast<PHINode>(BI)) {
    int j = P->getBasicBlockIndex(exitingBlock);
    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
    P->setIncomingBlock(j, preheader);
    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
      P->removeIncomingValue(exitingBlocks[i]);
    ++BI;
  }

  // Update the dominator tree and remove the instructions and blocks that will
  // be deleted from the reference counting scheme.
  DominatorTree &DT = getAnalysis<DominatorTree>();
  SmallVector<DomTreeNode*, 8> ChildNodes;
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI) {
    // Move all of the block's children to be children of the preheader, which
    // allows us to remove the domtree entry for the block.
    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
    for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
         DE = ChildNodes.end(); DI != DE; ++DI) {
      DT.changeImmediateDominator(*DI, DT[preheader]);
    }

    ChildNodes.clear();
    DT.eraseNode(*LI);

    // Remove the block from the reference counting scheme, so that we can
    // delete it freely later.
    (*LI)->dropAllReferences();
  }

  // Erase the instructions and the blocks without having to worry
  // about ordering because we already dropped the references.
  // NOTE: This iteration is safe because erasing the block does not remove its
  // entry from the loop's block list.  We do that in the next section.
  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
       LI != LE; ++LI)
    (*LI)->eraseFromParent();

  // Finally, the blocks from loopinfo.  This has to happen late because
  // otherwise our loop iterators won't work.
  LoopInfo &loopInfo = getAnalysis<LoopInfo>();
  SmallPtrSet<BasicBlock*, 8> blocks;
  blocks.insert(L->block_begin(), L->block_end());
  for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
       E = blocks.end(); I != E; ++I)
    loopInfo.removeBlock(*I);

  // The last step is to inform the loop pass manager that we've
  // eliminated this loop.
  LPM.deleteLoopFromQueue(L);
  Changed = true;

  ++NumDeleted;

  return Changed;
}
コード例 #19
0
    bool run()
    {
        // This enumerates the locals that we actually care about and packs them. So for example
        // if we use local 1, 3, 4, 5, 7, then we remap them: 1->0, 3->1, 4->2, 5->3, 7->4. We
        // treat a variable as being "used" if there exists an access to it (SetLocal, GetLocal,
        // Flush, PhantomLocal).
        
        BitVector usedLocals;
        
        // Collect those variables that are used from IR.
        bool hasNodesThatNeedFixup = false;
        for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
            BasicBlock* block = m_graph.block(blockIndex);
            if (!block)
                continue;
            for (unsigned nodeIndex = block->size(); nodeIndex--;) {
                Node* node = block->at(nodeIndex);
                switch (node->op()) {
                case GetLocal:
                case SetLocal:
                case Flush:
                case PhantomLocal: {
                    VariableAccessData* variable = node->variableAccessData();
                    if (variable->local().isArgument())
                        break;
                    usedLocals.set(variable->local().toLocal());
                    break;
                }
                    
                case GetLocalUnlinked: {
                    VirtualRegister operand = node->unlinkedLocal();
                    if (operand.isArgument())
                        break;
                    usedLocals.set(operand.toLocal());
                    hasNodesThatNeedFixup = true;
                    break;
                }
                    
                case LoadVarargs:
                case ForwardVarargs: {
                    LoadVarargsData* data = node->loadVarargsData();
                    if (data->count.isLocal())
                        usedLocals.set(data->count.toLocal());
                    if (data->start.isLocal()) {
                        // This part really relies on the contiguity of stack layout
                        // assignments.
                        ASSERT(VirtualRegister(data->start.offset() + data->limit - 1).isLocal());
                        for (unsigned i = data->limit; i--;) 
                            usedLocals.set(VirtualRegister(data->start.offset() + i).toLocal());
                    } // the else case shouldn't happen.
                    hasNodesThatNeedFixup = true;
                    break;
                }
                    
                case PutStack:
                case GetStack: {
                    StackAccessData* stack = node->stackAccessData();
                    if (stack->local.isArgument())
                        break;
                    usedLocals.set(stack->local.toLocal());
                    break;
                }
                    
                default:
                    break;
                }
            }
        }
        
        for (InlineCallFrameSet::iterator iter = m_graph.m_plan.inlineCallFrames->begin(); !!iter; ++iter) {
            InlineCallFrame* inlineCallFrame = *iter;
            
            if (inlineCallFrame->isVarargs()) {
                usedLocals.set(VirtualRegister(
                    JSStack::ArgumentCount + inlineCallFrame->stackOffset).toLocal());
            }
            
            for (unsigned argument = inlineCallFrame->arguments.size(); argument-- > 1;) {
                usedLocals.set(VirtualRegister(
                    virtualRegisterForArgument(argument).offset() +
                    inlineCallFrame->stackOffset).toLocal());
            }
        }
        
        Vector<unsigned> allocation(usedLocals.size());
        m_graph.m_nextMachineLocal = 0;
        for (unsigned i = 0; i < usedLocals.size(); ++i) {
            if (!usedLocals.get(i)) {
                allocation[i] = UINT_MAX;
                continue;
            }
            
            allocation[i] = m_graph.m_nextMachineLocal++;
        }
        
        for (unsigned i = m_graph.m_variableAccessData.size(); i--;) {
            VariableAccessData* variable = &m_graph.m_variableAccessData[i];
            if (!variable->isRoot())
                continue;
            
            if (variable->local().isArgument()) {
                variable->machineLocal() = variable->local();
                continue;
            }
            
            size_t local = variable->local().toLocal();
            if (local >= allocation.size())
                continue;
            
            if (allocation[local] == UINT_MAX)
                continue;
            
            variable->machineLocal() = assign(allocation, variable->local());
        }
        
        for (StackAccessData* data : m_graph.m_stackAccessData) {
            if (!data->local.isLocal()) {
                data->machineLocal = data->local;
                continue;
            }
            
            if (static_cast<size_t>(data->local.toLocal()) >= allocation.size())
                continue;
            if (allocation[data->local.toLocal()] == UINT_MAX)
                continue;
            
            data->machineLocal = assign(allocation, data->local);
        }
        
        // This register is never valid for DFG code blocks.
        codeBlock()->setActivationRegister(VirtualRegister());
        if (LIKELY(!m_graph.hasDebuggerEnabled()))
            codeBlock()->setScopeRegister(VirtualRegister());
        else
            codeBlock()->setScopeRegister(assign(allocation, codeBlock()->scopeRegister()));

        for (unsigned i = m_graph.m_inlineVariableData.size(); i--;) {
            InlineVariableData data = m_graph.m_inlineVariableData[i];
            InlineCallFrame* inlineCallFrame = data.inlineCallFrame;
            
            if (inlineCallFrame->isVarargs()) {
                inlineCallFrame->argumentCountRegister = assign(
                    allocation, VirtualRegister(inlineCallFrame->stackOffset + JSStack::ArgumentCount));
            }
            
            for (unsigned argument = inlineCallFrame->arguments.size(); argument-- > 1;) {
                ArgumentPosition& position = m_graph.m_argumentPositions[
                    data.argumentPositionStart + argument];
                VariableAccessData* variable = position.someVariable();
                ValueSource source;
                if (!variable)
                    source = ValueSource(SourceIsDead);
                else {
                    source = ValueSource::forFlushFormat(
                        variable->machineLocal(), variable->flushFormat());
                }
                inlineCallFrame->arguments[argument] = source.valueRecovery();
            }
            
            RELEASE_ASSERT(inlineCallFrame->isClosureCall == !!data.calleeVariable);
            if (inlineCallFrame->isClosureCall) {
                VariableAccessData* variable = data.calleeVariable->find();
                ValueSource source = ValueSource::forFlushFormat(
                    variable->machineLocal(),
                    variable->flushFormat());
                inlineCallFrame->calleeRecovery = source.valueRecovery();
            } else
                RELEASE_ASSERT(inlineCallFrame->calleeRecovery.isConstant());
        }
        
        // Fix GetLocalUnlinked's variable references.
        if (hasNodesThatNeedFixup) {
            for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
                BasicBlock* block = m_graph.block(blockIndex);
                if (!block)
                    continue;
                for (unsigned nodeIndex = block->size(); nodeIndex--;) {
                    Node* node = block->at(nodeIndex);
                    switch (node->op()) {
                    case GetLocalUnlinked: {
                        node->setUnlinkedMachineLocal(assign(allocation, node->unlinkedLocal()));
                        break;
                    }
                        
                    case LoadVarargs:
                    case ForwardVarargs: {
                        LoadVarargsData* data = node->loadVarargsData();
                        data->machineCount = assign(allocation, data->count);
                        data->machineStart = assign(allocation, data->start);
                        break;
                    }
                        
                    default:
                        break;
                    }
                }
            }
        }
        
        return true;
    }
コード例 #20
0
ファイル: DFGValidate.cpp プロジェクト: 3163504123/phantomjs
    void validate()
    {
        // NB. This code is not written for performance, since it is not intended to run
        // in release builds.
        
        // Validate that all local variables at the head of the root block are dead.
        BasicBlock* root = m_graph.m_blocks[0].get();
        for (unsigned i = 0; i < root->variablesAtHead.numberOfLocals(); ++i)
            V_EQUAL((static_cast<VirtualRegister>(i), 0), static_cast<Node*>(0), root->variablesAtHead.local(i));
        
        // Validate ref counts and uses.
        HashMap<Node*, unsigned> myRefCounts;
        for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) {
            BasicBlock* block = m_graph.m_blocks[blockIndex].get();
            if (!block || !block->isReachable)
                continue;
            for (size_t i = 0; i < block->numNodes(); ++i)
                myRefCounts.add(block->node(i), 0);
        }
        HashSet<Node*> acceptableNodes;
        for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) {
            BasicBlock* block = m_graph.m_blocks[blockIndex].get();
            if (!block || !block->isReachable)
                continue;
            for (size_t i = 0; i < block->numNodes(); ++i) {
                Node* node = block->node(i);
                acceptableNodes.add(node);
                if (!node->shouldGenerate())
                    continue;
                for (unsigned j = 0; j < m_graph.numChildren(node); ++j) {
                    // Phi children in LoadStore form are invalid.
                    if (m_graph.m_form == LoadStore && block->isPhiIndex(i))
                        continue;
                    
                    Edge edge = m_graph.child(node, j);
                    if (!edge)
                        continue;
                    
                    myRefCounts.find(edge.node())->value++;
                    
                    // Unless I'm a Flush, Phantom, GetLocal, or Phi, my children should hasResult().
                    switch (node->op()) {
                    case Flush:
                    case GetLocal:
                        VALIDATE((node, edge), edge->hasVariableAccessData());
                        VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData());
                        break;
                    case PhantomLocal:
                        VALIDATE((node, edge), edge->hasVariableAccessData());
                        VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData());
                        VALIDATE((node, edge), edge->op() != SetLocal);
                        break;
                    case Phi:
                        VALIDATE((node, edge), edge->hasVariableAccessData());
                        if (m_graph.m_unificationState == LocallyUnified)
                            break;
                        VALIDATE((node, edge), edge->variableAccessData() == node->variableAccessData());
                        break;
                    case Phantom:
                        switch (m_graph.m_form) {
                        case LoadStore:
                            if (j) {
                                VALIDATE((node, edge), edge->hasResult());
                                break;
                            }
                            switch (edge->op()) {
                            case Phi:
                            case SetArgument:
                            case SetLocal:
                                break;
                            default:
                                VALIDATE((node, edge), edge->hasResult());
                                break;
                            }
                            break;
                        case ThreadedCPS:
                            VALIDATE((node, edge), edge->hasResult());
                            break;
                        }
                        break;
                    default:
                        VALIDATE((node, edge), edge->hasResult());
                        break;
                    }
                }
            }
        }
        for (BlockIndex blockIndex = 0; blockIndex < m_graph.m_blocks.size(); ++blockIndex) {
            BasicBlock* block = m_graph.m_blocks[blockIndex].get();
            if (!block || !block->isReachable)
                continue;
            
            HashSet<Node*> phisInThisBlock;
            HashSet<Node*> nodesInThisBlock;
            
            for (size_t i = 0; i < block->numNodes(); ++i) {
                Node* node = block->node(i);
                nodesInThisBlock.add(node);
                if (block->isPhiIndex(i))
                    phisInThisBlock.add(node);
                if (m_graph.m_refCountState == ExactRefCount)
                    V_EQUAL((node), myRefCounts.get(node), node->adjustedRefCount());
                else
                    V_EQUAL((node), node->refCount(), 1);
                for (unsigned j = 0; j < m_graph.numChildren(node); ++j) {
                    Edge edge = m_graph.child(node, j);
                    if (!edge)
                        continue;
                    VALIDATE((node, edge), acceptableNodes.contains(edge.node()));
                }
            }
            
            for (size_t i = 0; i < block->phis.size(); ++i) {
                Node* node = block->phis[i];
                ASSERT(phisInThisBlock.contains(node));
                VALIDATE((node), node->op() == Phi);
                VirtualRegister local = node->local();
                for (unsigned j = 0; j < m_graph.numChildren(node); ++j) {
                    // Phi children in LoadStore form are invalid.
                    if (m_graph.m_form == LoadStore && block->isPhiIndex(i))
                        continue;
                    
                    Edge edge = m_graph.child(node, j);
                    if (!edge)
                        continue;
                    
                    VALIDATE(
                        (node, edge),
                        edge->op() == SetLocal
                        || edge->op() == SetArgument
                        || edge->op() == Flush
                        || edge->op() == Phi
                        || edge->op() == ZombieHint
                        || edge->op() == MovHint
                        || edge->op() == MovHintAndCheck);
                    
                    if (phisInThisBlock.contains(edge.node()))
                        continue;
                    
                    if (nodesInThisBlock.contains(edge.node())) {
                        VALIDATE(
                            (node, edge),
                            edge->op() == SetLocal
                            || edge->op() == ZombieHint
                            || edge->op() == MovHint
                            || edge->op() == MovHintAndCheck
                            || edge->op() == SetArgument
                            || edge->op() == Flush);
                        
                        continue;
                    }
                    
                    // There must exist a predecessor block that has this node index in
                    // its tail variables.
                    bool found = false;
                    for (unsigned k = 0; k < block->m_predecessors.size(); ++k) {
                        BasicBlock* prevBlock = m_graph.m_blocks[block->m_predecessors[k]].get();
                        VALIDATE((Block, block->m_predecessors[k]), prevBlock);
                        VALIDATE((Block, block->m_predecessors[k]), prevBlock->isReachable);
                        Node* prevNode = prevBlock->variablesAtTail.operand(local);
                        // If we have a Phi that is not referring to *this* block then all predecessors
                        // must have that local available.
                        VALIDATE((local, blockIndex, Block, block->m_predecessors[k]), prevNode);
                        switch (prevNode->op()) {
                        case GetLocal:
                        case Flush:
                        case PhantomLocal:
                            prevNode = prevNode->child1().node();
                            break;
                        default:
                            break;
                        }
                        if (node->shouldGenerate()) {
                            VALIDATE((local, block->m_predecessors[k], prevNode),
                                     prevNode->shouldGenerate());
                        }
                        VALIDATE(
                            (local, block->m_predecessors[k], prevNode),
                            prevNode->op() == SetLocal
                            || prevNode->op() == MovHint
                            || prevNode->op() == MovHintAndCheck
                            || prevNode->op() == ZombieHint
                            || prevNode->op() == SetArgument
                            || prevNode->op() == Phi);
                        if (prevNode == edge.node()) {
                            found = true;
                            break;
                        }
                        // At this point it cannot refer into this block.
                        VALIDATE((local, block->m_predecessors[k], prevNode), !prevBlock->isInBlock(edge.node()));
                    }
                    
                    VALIDATE((node, edge), found);
                }
            }
            
            Operands<size_t> getLocalPositions(
                block->variablesAtHead.numberOfArguments(),
                block->variablesAtHead.numberOfLocals());
            Operands<size_t> setLocalPositions(
                block->variablesAtHead.numberOfArguments(),
                block->variablesAtHead.numberOfLocals());
            
            for (size_t i = 0; i < block->variablesAtHead.numberOfArguments(); ++i) {
                VALIDATE((static_cast<VirtualRegister>(argumentToOperand(i)), blockIndex), !block->variablesAtHead.argument(i) || block->variablesAtHead.argument(i)->hasVariableAccessData());
                if (m_graph.m_form == ThreadedCPS)
                    VALIDATE((static_cast<VirtualRegister>(argumentToOperand(i)), blockIndex), !block->variablesAtTail.argument(i) || block->variablesAtTail.argument(i)->hasVariableAccessData());
                
                getLocalPositions.argument(i) = notSet;
                setLocalPositions.argument(i) = notSet;
            }
            for (size_t i = 0; i < block->variablesAtHead.numberOfLocals(); ++i) {
                VALIDATE((static_cast<VirtualRegister>(i), blockIndex), !block->variablesAtHead.local(i) || block->variablesAtHead.local(i)->hasVariableAccessData());
                if (m_graph.m_form == ThreadedCPS)
                    VALIDATE((static_cast<VirtualRegister>(i), blockIndex), !block->variablesAtTail.local(i) || block->variablesAtTail.local(i)->hasVariableAccessData());

                getLocalPositions.local(i) = notSet;
                setLocalPositions.local(i) = notSet;
            }
            
            for (size_t i = 0; i < block->size(); ++i) {
                Node* node = block->at(i);
                ASSERT(nodesInThisBlock.contains(node));
                VALIDATE((node), node->op() != Phi);
                for (unsigned j = 0; j < m_graph.numChildren(node); ++j) {
                    Edge edge = m_graph.child(node, j);
                    if (!edge)
                        continue;
                    VALIDATE((node, edge), nodesInThisBlock.contains(edge.node()));
                    switch (node->op()) {
                    case PhantomLocal:
                    case GetLocal:
                    case Flush:
                        break;
                    case Phantom:
                        if (m_graph.m_form == LoadStore && !j)
                            break;
                    default:
                        VALIDATE((node, edge), !phisInThisBlock.contains(edge.node()));
                        break;
                    }
                }
                
                if (!node->shouldGenerate())
                    continue;
                switch (node->op()) {
                case GetLocal:
                    if (node->variableAccessData()->isCaptured())
                        break;
                    // Ignore GetLocal's that we know to be dead, but that the graph
                    // doesn't yet know to be dead.
                    if (!myRefCounts.get(node))
                        break;
                    if (m_graph.m_form == ThreadedCPS)
                        VALIDATE((node, blockIndex), getLocalPositions.operand(node->local()) == notSet);
                    getLocalPositions.operand(node->local()) = i;
                    break;
                case SetLocal:
                    if (node->variableAccessData()->isCaptured())
                        break;
                    // Only record the first SetLocal. There may be multiple SetLocals
                    // because of flushing.
                    if (setLocalPositions.operand(node->local()) != notSet)
                        break;
                    setLocalPositions.operand(node->local()) = i;
                    break;
                default:
                    break;
                }
            }
            
            if (m_graph.m_form == LoadStore)
                continue;
            
            for (size_t i = 0; i < block->variablesAtHead.numberOfArguments(); ++i) {
                checkOperand(
                    blockIndex, getLocalPositions, setLocalPositions, argumentToOperand(i));
            }
            for (size_t i = 0; i < block->variablesAtHead.numberOfLocals(); ++i) {
                checkOperand(
                    blockIndex, getLocalPositions, setLocalPositions, i);
            }
        }
    }
コード例 #21
0
/// Insert code in the prolog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
/// of loop bodes in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
/// the switch instruction is generated.
///
///        extraiters = tripcount % loopfactor
///        if (extraiters == 0) jump Loop:
///        else jump Prol
/// Prol:  LoopBody;
///        extraiters -= 1                 // Omitted if unroll factor is 2.
///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
///        if (tripcount < loopfactor) jump End
/// Loop:
/// ...
/// End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                                   bool AllowExpensiveTripCount, LoopInfo *LI,
                                   LPPassManager *LPM) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
    return false;

  // Use Scalar Evolution to compute the trip count.  This allows more
  // loops to be unrolled than relying on induction var simplification
  if (!LPM)
    return false;
  auto *SEWP = LPM->getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
  if (!SEWP)
    return false;
  ScalarEvolution &SE = SEWP->getSE();

  // Only unroll loops with a computable trip count and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item)
  const SCEV *BECountSC = SE.getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy())
    return false;

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration
  const SCEV *TripCountSC =
      SE.getAddExpr(BECountSC, SE.getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  BasicBlock *Header = L->getHeader();
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
    return false;

  // We only handle cases when the unroll factor is a power of 2.
  // Count is the loop unroll factor, the number of extra copies added + 1.
  if (!isPowerOf2_32(Count))
    return false;

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth)
    return false;

  // If this loop is nested, then the loop unroller changes the code in
  // parent loop, so the Scalar Evolution pass needs to be run again
  if (Loop *ParentLoop = L->getParentLoop())
    SE.forgetLoop(ParentLoop);

  // Grab analyses that we preserve.
  auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;

  BasicBlock *PH = L->getLoopPreheader();
  BasicBlock *Latch = L->getLoopLatch();
  // It helps to splits the original preheader twice, one for the end of the
  // prolog code and one for a new loop preheader
  BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % (loop unroll factor + 1)
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);

  IRBuilder<> B(PreHeaderBR);
  Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");

  // If ModVal is zero, we know that either
  //  1. there are no iteration to be run in the prologue loop
  // OR
  //  2. the addition computing TripCount overflowed
  //
  // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
  // number of iterations that remain to be run in the original loop is a
  // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
  // explicitly check this above).

  Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");

  // Branch to either the extra iterations or the cloned/unrolled loop
  // We will fix up the true branch label when adding loop body copies
  B.CreateCondBr(BranchVal, PEnd, PEnd);
  assert(PreHeaderBR->isUnconditional() &&
         PreHeaderBR->getSuccessor(0) == PEnd &&
         "CFG edges in Preheader are not correct");
  PreHeaderBR->eraseFromParent();
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  bool UnrollPrologue = Count == 2;

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
                  VMap, LI);

  // Insert the cloned blocks into function just before the original loop
  F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(), F->end());

  // Rewrite the cloned instruction operands to use the values
  // created when the clone is created.
  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
                              E = NewBlocks[i]->end();
         I != E; ++I) {
      RemapInstruction(&*I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
    }
  }

  // Connect the prolog code to the original loop and update the
  // PHI functions.
  BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
  ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
                LPM->getAsPass());
  NumRuntimeUnrolled++;
  return true;
}
コード例 #22
0
ファイル: LCSSA.cpp プロジェクト: C0deZLee/IntFlow
/// ProcessInstruction - Given an instruction in the loop, check to see if it
/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
/// nodes and rewrite the uses.
bool LCSSA::ProcessInstruction(Instruction *Inst,
                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
  SmallVector<Use*, 16> UsesToRewrite;
  
  BasicBlock *InstBB = Inst->getParent();
  
  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
       UI != E; ++UI) {
    User *U = *UI;
    BasicBlock *UserBB = cast<Instruction>(U)->getParent();
    if (PHINode *PN = dyn_cast<PHINode>(U))
      UserBB = PN->getIncomingBlock(UI);
    
    if (InstBB != UserBB && !inLoop(UserBB))
      UsesToRewrite.push_back(&UI.getUse());
  }

  // If there are no uses outside the loop, exit with no change.
  if (UsesToRewrite.empty()) return false;
  
  ++NumLCSSA; // We are applying the transformation

  // Invoke instructions are special in that their result value is not available
  // along their unwind edge. The code below tests to see whether DomBB dominates
  // the value, so adjust DomBB to the normal destination block, which is
  // effectively where the value is first usable.
  BasicBlock *DomBB = Inst->getParent();
  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
    DomBB = Inv->getNormalDest();

  DomTreeNode *DomNode = DT->getNode(DomBB);

  SmallVector<PHINode*, 16> AddedPHIs;

  SSAUpdater SSAUpdate;
  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
  
  // Insert the LCSSA phi's into all of the exit blocks dominated by the
  // value, and add them to the Phi's map.
  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
    BasicBlock *ExitBB = *BBI;
    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
    
    // If we already inserted something for this BB, don't reprocess it.
    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
    
    PHINode *PN = PHINode::Create(Inst->getType(),
                                  PredCache.GetNumPreds(ExitBB),
                                  Inst->getName()+".lcssa",
                                  ExitBB->begin());

    // Add inputs from inside the loop for this PHI.
    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
      PN->addIncoming(Inst, *PI);

      // If the exit block has a predecessor not within the loop, arrange for
      // the incoming value use corresponding to that predecessor to be
      // rewritten in terms of a different LCSSA PHI.
      if (!inLoop(*PI))
        UsesToRewrite.push_back(
          &PN->getOperandUse(
            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
    }

    AddedPHIs.push_back(PN);
    
    // Remember that this phi makes the value alive in this block.
    SSAUpdate.AddAvailableValue(ExitBB, PN);
  }

  // Rewrite all uses outside the loop in terms of the new PHIs we just
  // inserted.
  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
    // If this use is in an exit block, rewrite to use the newly inserted PHI.
    // This is required for correctness because SSAUpdate doesn't handle uses in
    // the same block.  It assumes the PHI we inserted is at the end of the
    // block.
    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
    BasicBlock *UserBB = User->getParent();
    if (PHINode *PN = dyn_cast<PHINode>(User))
      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);

    if (isa<PHINode>(UserBB->begin()) &&
        isExitBlock(UserBB, ExitBlocks)) {
      // Tell the VHs that the uses changed. This updates SCEV's caches.
      if (UsesToRewrite[i]->get()->hasValueHandle())
        ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
      UsesToRewrite[i]->set(UserBB->begin());
      continue;
    }
    
    // Otherwise, do full PHI insertion.
    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
  }

  // Remove PHI nodes that did not have any uses rewritten.
  for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
    if (AddedPHIs[i]->use_empty())
      AddedPHIs[i]->eraseFromParent();
  }
  
  return true;
}
コード例 #23
0
ファイル: LinkList.cpp プロジェクト: songlh/LDoctor
bool isLinkListLoop(Loop * pLoop, map<PHINode *, set<Value *> > & LinkListHeaderMapping )
{
	BasicBlock * pHeader = pLoop->getHeader();

	vector<PHINode *> vecToDo;

	for(BasicBlock::iterator II = pHeader->begin(); II != pHeader->end(); II ++ )
	{
		if(isa<PHINode>(II))
		{
			if(PointerType * pPointerType = dyn_cast<PointerType>(II->getType()))
			{
				if(isa<StructType>(pPointerType->getElementType()))
				{
					vecToDo.push_back(cast<PHINode>(II));
				}
			}
		}
		else
		{
			break;
		}
	}


	if(vecToDo.size() == 0)
	{
		return false;
	}

	vector<PHINode *>::iterator itVecPhiBegin = vecToDo.begin();
	vector<PHINode *>::iterator itVecPhiEnd   = vecToDo.end();


	for(; itVecPhiBegin != itVecPhiEnd; itVecPhiBegin ++ )
	{
		PHINode * pPHI = *itVecPhiBegin;

		bool bFlag = true;

		for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ )
		{
			if(pLoop->contains(pPHI->getIncomingBlock(i)))
			{
				if(Instruction * pInst = dyn_cast<Instruction>(pPHI->getIncomingValue(i)))
				{
					if(!isReachableThroughLinkListDereference(pPHI, pInst, pLoop))
					{
						bFlag = false;
						break;
					}
				}
				else
				{
					bFlag = false;
					break;
				}
			}
		}

		if(bFlag)
		{
			for(unsigned i = 0; i < pPHI->getNumIncomingValues(); i ++ )
			{
				if(!pLoop->contains(pPHI->getIncomingBlock(i)))
				{
					LinkListHeaderMapping[pPHI].insert(pPHI->getIncomingValue(i));
				}
			}
		}
	}


	if(LinkListHeaderMapping.size() > 0)
	{
		return true;
	}
	else
	{
		return false;
	}
}
コード例 #24
0
ファイル: InlineFunction.cpp プロジェクト: berr/llvm
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
///
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  // If IFI has any state in it, zap it before we fill it in.
  IFI.reset();
  
  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;

  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get the personality function from the callee if it contains a landing pad.
  Value *CalleePersonality = 0;
  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
       I != E; ++I)
    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
      const BasicBlock *BB = II->getUnwindDest();
      const LandingPadInst *LP = BB->getLandingPadInst();
      CalleePersonality = LP->getPersonalityFn();
      break;
    }

  // Find the personality function used by the landing pads of the caller. If it
  // exists, then check to see that it matches the personality function used in
  // the callee.
  if (CalleePersonality) {
    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
         I != E; ++I)
      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
        const BasicBlock *BB = II->getUnwindDest();
        const LandingPadInst *LP = BB->getLandingPadInst();

        // If the personality functions match, then we can perform the
        // inlining. Otherwise, we can't inline.
        // TODO: This isn't 100% true. Some personality functions are proper
        //       supersets of others and can be used in place of the other.
        if (LP->getPersonalityFn() != CalleePersonality)
          return false;

        break;
      }
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  SmallVector<ReturnInst*, 8> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy VMap after cloning.
    ValueToValueMapTy VMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CS.isByValArgument(ArgNo)) {
        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                        CalledFunc->getParamAlignment(ArgNo+1));
 
        // Calls that we inline may use the new alloca, so we need to clear
        // their 'tail' flags if HandleByValArgument introduced a new alloca and
        // the callee has calls.
        MustClearTailCallFlags |= ActualArg != *AI;
      }

      VMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                              /*ModuleLevelChanges=*/false, Returns, ".i",
                              &InlinedFunctionInfo, IFI.TD, TheCall);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (IFI.CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

    // Update inlined instructions' line number information.
    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
         E = FirstNewBlock->end(); I != E; ) {
      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
      if (AI == 0) continue;
      
      // If the alloca is now dead, remove it.  This often occurs due to code
      // specialization.
      if (AI->use_empty()) {
        AI->eraseFromParent();
        continue;
      }

      if (!isa<Constant>(AI->getArraySize()))
        continue;
      
      // Keep track of the static allocas that we inline into the caller.
      IFI.StaticAllocas.push_back(AI);
      
      // Scan for the block of allocas that we can move over, and move them
      // all at once.
      while (isa<AllocaInst>(I) &&
             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
        ++I;
      }

      // Transfer all of the allocas over in a block.  Using splice means
      // that the instructions aren't removed from the symbol table, then
      // reinserted.
      Caller->getEntryBlock().getInstList().splice(InsertPoint,
                                                   FirstNewBlock->getInstList(),
                                                   AI, I);
    }
  }

  // Leave lifetime markers for the static alloca's, scoping them to the
  // function we just inlined.
  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    IRBuilder<> builder(FirstNewBlock->begin());
    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
      AllocaInst *AI = IFI.StaticAllocas[ai];

      // If the alloca is already scoped to something smaller than the whole
      // function then there's no need to add redundant, less accurate markers.
      if (hasLifetimeMarkers(AI))
        continue;

      // Try to determine the size of the allocation.
      ConstantInt *AllocaSize = 0;
      if (ConstantInt *AIArraySize =
          dyn_cast<ConstantInt>(AI->getArraySize())) {
        if (IFI.TD) {
          Type *AllocaType = AI->getAllocatedType();
          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
          // Check that array size doesn't saturate uint64_t and doesn't
          // overflow when it's multiplied by type size.
          if (AllocaArraySize != ~0ULL &&
              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                          AllocaArraySize * AllocaTypeSize);
          }
        }
      }

      builder.CreateLifetimeStart(AI, AllocaSize);
      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
        IRBuilder<> builder(Returns[ri]);
        builder.CreateLifetimeEnd(AI, AllocaSize);
      }
    }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
      .CreateCall(StackSave, "savedstack");

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
    }

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      if (TheCall == R->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  BranchInst *CreatedBranchToNormalDest = NULL;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  Type *RTy = CalledFunc->getReturnType();

  PHINode *PHI = 0;
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }


    // Add a branch to the merge points and remove return instructions.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst::Create(AfterCallBB, RI);
      RI->eraseFromParent();
    }
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty()) {
      if (TheCall == Returns[0]->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    }

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    if (CreatedBranchToNormalDest)
      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  // If we inserted a phi node, check to see if it has a single value (e.g. all
  // the entries are the same or undef).  If so, remove the PHI so it doesn't
  // block other optimizations.
  if (PHI) {
    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
      PHI->replaceAllUsesWith(V);
      PHI->eraseFromParent();
    }
  }

  return true;
}
コード例 #25
0
ファイル: UnifyFunctionExitNodes.cpp プロジェクト: CPFL/guc
// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
// BasicBlock, and converting all returns to unconditional branches to this
// new basic block.  The singular exit node is returned.
//
// If there are no return stmts in the Function, a null pointer is returned.
//
bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
  // Loop over all of the blocks in a function, tracking all of the blocks that
  // return.
  //
  std::vector<BasicBlock*> ReturningBlocks;
  std::vector<BasicBlock*> UnwindingBlocks;
  std::vector<BasicBlock*> UnreachableBlocks;
  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
    if (isa<ReturnInst>(I->getTerminator()))
      ReturningBlocks.push_back(I);
    else if (isa<UnwindInst>(I->getTerminator()))
      UnwindingBlocks.push_back(I);
    else if (isa<UnreachableInst>(I->getTerminator()))
      UnreachableBlocks.push_back(I);

  // Handle unwinding blocks first.
  if (UnwindingBlocks.empty()) {
    UnwindBlock = 0;
  } else if (UnwindingBlocks.size() == 1) {
    UnwindBlock = UnwindingBlocks.front();
  } else {
    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
    new UnwindInst(F.getContext(), UnwindBlock);

    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
           E = UnwindingBlocks.end(); I != E; ++I) {
      BasicBlock *BB = *I;
      BB->getInstList().pop_back();  // Remove the unwind insn
      BranchInst::Create(UnwindBlock, BB);
    }
  }

  // Then unreachable blocks.
  if (UnreachableBlocks.empty()) {
    UnreachableBlock = 0;
  } else if (UnreachableBlocks.size() == 1) {
    UnreachableBlock = UnreachableBlocks.front();
  } else {
    UnreachableBlock = BasicBlock::Create(F.getContext(), 
                                          "UnifiedUnreachableBlock", &F);
    new UnreachableInst(F.getContext(), UnreachableBlock);

    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
           E = UnreachableBlocks.end(); I != E; ++I) {
      BasicBlock *BB = *I;
      BB->getInstList().pop_back();  // Remove the unreachable inst.
      BranchInst::Create(UnreachableBlock, BB);
    }
  }

  // Now handle return blocks.
  if (ReturningBlocks.empty()) {
    ReturnBlock = 0;
    return false;                          // No blocks return
  } else if (ReturningBlocks.size() == 1) {
    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
    return false;
  }

  // Otherwise, we need to insert a new basic block into the function, add a PHI
  // nodes (if the function returns values), and convert all of the return
  // instructions into unconditional branches.
  //
  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
                                               "UnifiedReturnBlock", &F);

  PHINode *PN = 0;
  if (F.getReturnType()->isVoidTy()) {
    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
  } else {
    // If the function doesn't return void... add a PHI node to the block...
    PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
    NewRetBlock->getInstList().push_back(PN);
    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
  }

  // Loop over all of the blocks, replacing the return instruction with an
  // unconditional branch.
  //
  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
         E = ReturningBlocks.end(); I != E; ++I) {
    BasicBlock *BB = *I;

    // Add an incoming element to the PHI node for every return instruction that
    // is merging into this new block...
    if (PN)
      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);

    BB->getInstList().pop_back();  // Remove the return insn
    BranchInst::Create(NewRetBlock, BB);
  }
  ReturnBlock = NewRetBlock;
  return true;
}
コード例 #26
0
ファイル: convert.cpp プロジェクト: sploving/safecode-mirror
//
// Method: InsertFreesAtEnd()
//
// Description:
//  Insert free instructions so that the memory allocated by the specified
//  malloc instruction is freed on function exit.
//
void
ConvertUnsafeAllocas::InsertFreesAtEnd(Instruction *MI) {
  assert (MI && "MI is NULL!\n");

  //
  // Get the dominance frontier information about the malloc instruction's
  // basic block.  We cache the information in case we end up processing
  // multiple instructions from the same function.
  //
  BasicBlock *currentBlock = MI->getParent();
  Function * F = currentBlock->getParent();
  DominanceFrontier * dfmt = &getAnalysis<DominanceFrontier>(*F);
  DominatorTree     * domTree = &getAnalysis<DominatorTree>(*F);

  DominanceFrontier::const_iterator it = dfmt->find(currentBlock);

#if 0
  //
  // If the basic block has a dominance frontier, use it.
  //
  if (it != dfmt->end()) {
    const DominanceFrontier::DomSetType &S = it->second;
    if (S.size() > 0) {
      DominanceFrontier::DomSetType::iterator pCurrent = S.begin(),
                                                  pEnd = S.end();
      for (; pCurrent != pEnd; ++pCurrent) {
        BasicBlock *frontierBlock = *pCurrent;
        // One of its predecessors is dominated by currentBlock;
        // need to insert a free in that predecessor
        for (pred_iterator SI = pred_begin(frontierBlock),
                           SE = pred_end(frontierBlock);
                           SI != SE; ++SI) {
          BasicBlock *predecessorBlock = *SI;
          if (domTree->dominates (predecessorBlock, currentBlock)) {
            // Get the terminator
            Instruction *InsertPt = predecessorBlock->getTerminator();
            new FreeInst(MI, InsertPt);
          } 
        }
      }

      return;
    }
  }
#endif

  //
  // There is no dominance frontier; insert frees on all returns;
  //
  std::vector<Instruction*> FreePoints;
  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
    if (isa<ReturnInst>(BB->getTerminator()) ||
        isa<ResumeInst>(BB->getTerminator()))
      FreePoints.push_back(BB->getTerminator());

  //
  // We have the Free points; now we construct the free instructions at each
  // of the points.
  //
  std::vector<Instruction*>::iterator fpI = FreePoints.begin(),
                                      fpE = FreePoints.end();
  for (; fpI != fpE ; ++ fpI) {
    //
    // Determine whether the allocation dominates the return.  If not, then
    // don't insert a free instruction for now.
    //
    Instruction *InsertPt = *fpI;
    if (domTree->dominates (MI->getParent(), InsertPt->getParent())) {
      CallInst::Create (kfree, MI, "", InsertPt);
    } else {
      ++MissingFrees;
    }
  }
}
コード例 #27
0
ファイル: SjLjEHPrepare.cpp プロジェクト: CTSRD-CHERI/llvm
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
                                           ArrayRef<InvokeInst *> Invokes) {
  // Finally, scan the code looking for instructions with bad live ranges.
  for (BasicBlock &BB : F) {
    for (Instruction &Inst : BB) {
      // Ignore obvious cases we don't have to handle. In particular, most
      // instructions either have no uses or only have a single use inside the
      // current block. Ignore them quickly.
      if (Inst.use_empty())
        continue;
      if (Inst.hasOneUse() &&
          cast<Instruction>(Inst.user_back())->getParent() == &BB &&
          !isa<PHINode>(Inst.user_back()))
        continue;

      // If this is an alloca in the entry block, it's not a real register
      // value.
      if (auto *AI = dyn_cast<AllocaInst>(&Inst))
        if (AI->isStaticAlloca())
          continue;

      // Avoid iterator invalidation by copying users to a temporary vector.
      SmallVector<Instruction *, 16> Users;
      for (User *U : Inst.users()) {
        Instruction *UI = cast<Instruction>(U);
        if (UI->getParent() != &BB || isa<PHINode>(UI))
          Users.push_back(UI);
      }

      // Find all of the blocks that this value is live in.
      SmallPtrSet<BasicBlock *, 32> LiveBBs;
      LiveBBs.insert(&BB);
      while (!Users.empty()) {
        Instruction *U = Users.pop_back_val();

        if (!isa<PHINode>(U)) {
          MarkBlocksLiveIn(U->getParent(), LiveBBs);
        } else {
          // Uses for a PHI node occur in their predecessor block.
          PHINode *PN = cast<PHINode>(U);
          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
            if (PN->getIncomingValue(i) == &Inst)
              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
        }
      }

      // Now that we know all of the blocks that this thing is live in, see if
      // it includes any of the unwind locations.
      bool NeedsSpill = false;
      for (InvokeInst *Invoke : Invokes) {
        BasicBlock *UnwindBlock = Invoke->getUnwindDest();
        if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) {
          LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
                            << UnwindBlock->getName() << "\n");
          NeedsSpill = true;
          break;
        }
      }

      // If we decided we need a spill, do it.
      // FIXME: Spilling this way is overkill, as it forces all uses of
      // the value to be reloaded from the stack slot, even those that aren't
      // in the unwind blocks. We should be more selective.
      if (NeedsSpill) {
        DemoteRegToStack(Inst, true);
        ++NumSpilled;
      }
    }
  }

  // Go through the landing pads and remove any PHIs there.
  for (InvokeInst *Invoke : Invokes) {
    BasicBlock *UnwindBlock = Invoke->getUnwindDest();
    LandingPadInst *LPI = UnwindBlock->getLandingPadInst();

    // Place PHIs into a set to avoid invalidating the iterator.
    SmallPtrSet<PHINode *, 8> PHIsToDemote;
    for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
      PHIsToDemote.insert(cast<PHINode>(PN));
    if (PHIsToDemote.empty())
      continue;

    // Demote the PHIs to the stack.
    for (PHINode *PN : PHIsToDemote)
      DemotePHIToStack(PN);

    // Move the landingpad instruction back to the top of the landing pad block.
    LPI->moveBefore(&UnwindBlock->front());
  }
}
コード例 #28
0
bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
  AtomicOrdering Order = AI->getOrdering();
  Value *Addr = AI->getPointerOperand();
  BasicBlock *BB = AI->getParent();
  Function *F = BB->getParent();
  LLVMContext &Ctx = F->getContext();

  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  //
  // The standard expansion we produce is:
  //     [...]
  //     fence?
  // atomicrmw.start:
  //     %loaded = @load.linked(%addr)
  //     %new = some_op iN %loaded, %incr
  //     %stored = @store_conditional(%new, %addr)
  //     %try_again = icmp i32 ne %stored, 0
  //     br i1 %try_again, label %loop, label %atomicrmw.end
  // atomicrmw.end:
  //     fence?
  //     [...]
  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
  BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);

  // This grabs the DebugLoc from AI.
  IRBuilder<> Builder(AI);

  // The split call above "helpfully" added a branch at the end of BB (to the
  // wrong place), but we might want a fence too. It's easiest to just remove
  // the branch entirely.
  std::prev(BB->end())->eraseFromParent();
  Builder.SetInsertPoint(BB);
  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
  Builder.CreateBr(LoopBB);

  // Start the main loop block now that we've taken care of the preliminaries.
  Builder.SetInsertPoint(LoopBB);
  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
      Builder, Addr, MemOpOrder);

  Value *NewVal;
  switch (AI->getOperation()) {
  case AtomicRMWInst::Xchg:
    NewVal = AI->getValOperand();
    break;
  case AtomicRMWInst::Add:
    NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Sub:
    NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::And:
    NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Nand:
    NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
                               "new");
    break;
  case AtomicRMWInst::Or:
    NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Xor:
    NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Max:
    NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Min:
    NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::UMax:
    NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::UMin:
    NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  default:
    llvm_unreachable("Unknown atomic op");
  }

  Value *StoreSuccess =
      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
          Builder, NewVal, Addr, MemOpOrder);
  Value *TryAgain = Builder.CreateICmpNE(
      StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);

  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  insertTrailingFence(Builder, Order);

  AI->replaceAllUsesWith(Loaded);
  AI->eraseFromParent();

  return true;
}
コード例 #29
0
bool LowerEmSetjmp::runOnModule(Module &M) {
  TheModule = &M;

  Function *Setjmp = TheModule->getFunction("setjmp");
  Function *Longjmp = TheModule->getFunction("longjmp");
  if (!Setjmp && !Longjmp) return false;

  Type *i32 = Type::getInt32Ty(M.getContext());
  Type *Void = Type::getVoidTy(M.getContext());

  // Add functions

  Function *EmSetjmp = NULL;

  if (Setjmp) {
    SmallVector<Type*, 2> EmSetjmpTypes;
    EmSetjmpTypes.push_back(Setjmp->getFunctionType()->getParamType(0));
    EmSetjmpTypes.push_back(i32); // extra param that says which setjmp in the function it is
    FunctionType *EmSetjmpFunc = FunctionType::get(i32, EmSetjmpTypes, false);
    EmSetjmp = Function::Create(EmSetjmpFunc, GlobalValue::ExternalLinkage, "emscripten_setjmp", TheModule);
  }

  Function *EmLongjmp = Longjmp ? Function::Create(Longjmp->getFunctionType(), GlobalValue::ExternalLinkage, "emscripten_longjmp", TheModule) : NULL;

  SmallVector<Type*, 1> IntArgTypes;
  IntArgTypes.push_back(i32);
  FunctionType *IntIntFunc = FunctionType::get(i32, IntArgTypes, false);

  Function *CheckLongjmp = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_check_longjmp", TheModule); // gets control flow

  Function *GetLongjmpResult = Function::Create(IntIntFunc, GlobalValue::ExternalLinkage, "emscripten_get_longjmp_result", TheModule); // gets int value longjmp'd

  FunctionType *VoidFunc = FunctionType::get(Void, false);
  Function *PrepSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_prep_setjmp", TheModule);

  Function *CleanupSetjmp = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_cleanup_setjmp", TheModule);

  Function *PreInvoke = TheModule->getFunction("emscripten_preinvoke");
  if (!PreInvoke) PreInvoke = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule);

  FunctionType *IntFunc = FunctionType::get(i32, false);
  Function *PostInvoke = TheModule->getFunction("emscripten_postinvoke");
  if (!PostInvoke) PostInvoke = Function::Create(IntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule);

  // Process all callers of setjmp and longjmp. Start with setjmp.

  typedef std::vector<PHINode*> Phis;
  typedef std::map<Function*, Phis> FunctionPhisMap;
  FunctionPhisMap SetjmpOutputPhis;
  std::vector<Instruction*> ToErase;

  if (Setjmp) {
    for (Instruction::user_iterator UI = Setjmp->user_begin(), UE = Setjmp->user_end(); UI != UE; ++UI) {
      User *U = *UI;
      if (CallInst *CI = dyn_cast<CallInst>(U)) {
        BasicBlock *SJBB = CI->getParent();
        // The tail is everything right after the call, and will be reached once when setjmp is
        // called, and later when longjmp returns to the setjmp
        BasicBlock *Tail = SplitBlock(SJBB, CI->getNextNode());
        // Add a phi to the tail, which will be the output of setjmp, which indicates if this is the
        // first call or a longjmp back. The phi directly uses the right value based on where we
        // arrive from
        PHINode *SetjmpOutput = PHINode::Create(i32, 2, "", Tail->getFirstNonPHI());
        SetjmpOutput->addIncoming(ConstantInt::get(i32, 0), SJBB); // setjmp initial call returns 0
        CI->replaceAllUsesWith(SetjmpOutput); // The proper output is now this, not the setjmp call itself
        // longjmp returns to the setjmp will add themselves to this phi
        Phis& P = SetjmpOutputPhis[SJBB->getParent()];
        P.push_back(SetjmpOutput);
        // fix call target
        SmallVector<Value *, 2> Args;
        Args.push_back(CI->getArgOperand(0));
        Args.push_back(ConstantInt::get(i32, P.size())); // our index in the function is our place in the array + 1
        CallInst::Create(EmSetjmp, Args, "", CI);
        ToErase.push_back(CI);
      } else {
        errs() << **UI << "\n";
        report_fatal_error("bad use of setjmp, should only call it");
      }
    }
  }

  // Update longjmp FIXME: we could avoid throwing in longjmp as an optimization when longjmping back into the current function perhaps?

  if (Longjmp) Longjmp->replaceAllUsesWith(EmLongjmp);

  // Update all setjmping functions

  for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) {
    Function *F = I->first;
    Phis& P = I->second;

    CallInst::Create(PrepSetjmp, "", F->begin()->begin());

    // Update each call that can longjmp so it can return to a setjmp where relevant

    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) {
      BasicBlock *BB = BBI++;
      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
        Instruction *I = Iter++;
        CallInst *CI;
        if ((CI = dyn_cast<CallInst>(I))) {
          Value *V = CI->getCalledValue();
          if (V == PrepSetjmp || V == EmSetjmp || V == CheckLongjmp || V == GetLongjmpResult || V == PreInvoke || V == PostInvoke) continue;
          if (Function *CF = dyn_cast<Function>(V)) if (CF->isIntrinsic()) continue;
          // TODO: proper analysis of what can actually longjmp. Currently we assume anything but setjmp can.
          // This may longjmp, so we need to check if it did. Split at that point, and
          // envelop the call in pre/post invoke, if we need to
          CallInst *After;
          Instruction *Check = NULL;
          if (Iter != E && (After = dyn_cast<CallInst>(Iter)) && After->getCalledValue() == PostInvoke) {
            // use the pre|postinvoke that exceptions lowering already made
            Check = Iter++;
          }
          BasicBlock *Tail = SplitBlock(BB, Iter); // Iter already points to the next instruction, as we need
          TerminatorInst *TI = BB->getTerminator();
          if (!Check) {
            // no existing pre|postinvoke, create our own
            CallInst::Create(PreInvoke, "", CI);
            Check = CallInst::Create(PostInvoke, "", TI); // CI is at the end of the block

            // If we are calling a function that is noreturn, we must remove that attribute. The code we
            // insert here does expect it to return, after we catch the exception.
            if (CI->doesNotReturn()) {
              if (Function *F = dyn_cast<Function>(CI->getCalledValue())) {
                F->removeFnAttr(Attribute::NoReturn);
              }
              CI->setAttributes(CI->getAttributes().removeAttribute(TheModule->getContext(), AttributeSet::FunctionIndex, Attribute::NoReturn));
              assert(!CI->doesNotReturn());
            }
          }

          // We need to replace the terminator in Tail - SplitBlock makes BB go straight to Tail, we need to check if a longjmp occurred, and
          // go to the right setjmp-tail if so
          SmallVector<Value *, 1> Args;
          Args.push_back(Check);
          Instruction *LongjmpCheck = CallInst::Create(CheckLongjmp, Args, "", BB);
          Instruction *LongjmpResult = CallInst::Create(GetLongjmpResult, Args, "", BB);
          SwitchInst *SI = SwitchInst::Create(LongjmpCheck, Tail, 2, BB);
          // -1 means no longjmp happened, continue normally (will hit the default switch case). 0 means a longjmp that is not ours to handle, needs a rethrow. Otherwise
          // the index mean is the same as the index in P+1 (to avoid 0).
          for (unsigned i = 0; i < P.size(); i++) {
            SI->addCase(cast<ConstantInt>(ConstantInt::get(i32, i+1)), P[i]->getParent());
            P[i]->addIncoming(LongjmpResult, BB);
          }
          ToErase.push_back(TI); // new terminator is now the switch

          // we are splitting the block here, and must continue to find other calls in the block - which is now split. so continue
          // to traverse in the Tail
          BB = Tail;
          Iter = BB->begin();
          E = BB->end();
        } else if (InvokeInst *CI = dyn_cast<InvokeInst>(I)) { // XXX check if target is setjmp
          (void)CI;
          report_fatal_error("TODO: invoke inside setjmping functions");
        }
      }
    }

    // add a cleanup before each return
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ) {
      BasicBlock *BB = BBI++;
      TerminatorInst *TI = BB->getTerminator();
      if (isa<ReturnInst>(TI)) {
        CallInst::Create(CleanupSetjmp, "", TI);
      }
    }
  }

  for (unsigned i = 0; i < ToErase.size(); i++) {
    ToErase[i]->eraseFromParent();
  }

  // Finally, our modifications to the cfg can break dominance of SSA variables. For example,
  //   if (x()) { .. setjmp() .. }
  //   if (y()) { .. longjmp() .. }
  // We must split the longjmp block, and it can jump into the setjmp one. But that means that when
  // we split the setjmp block, it's first part no longer dominates its second part - there is
  // a theoretically possible control flow path where x() is false, then y() is true and we
  // reach the second part of the setjmp block, without ever reaching the first part. So,
  // we recalculate regs vs. mem
  for (FunctionPhisMap::iterator I = SetjmpOutputPhis.begin(); I != SetjmpOutputPhis.end(); I++) {
    Function *F = I->first;
    doRegToMem(*F);
    doMemToReg(*F);
  }

  return true;
}
コード例 #30
0
/// isSafeToPromoteArgument - As you might guess from the name of this method,
/// it checks to see if it is both safe and useful to promote the argument.
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
  typedef std::set<IndicesVector> GEPIndicesSet;

  // Quick exit for unused arguments
  if (Arg->use_empty())
    return true;

  // We can only promote this argument if all of the uses are loads, or are GEP
  // instructions (with constant indices) that are subsequently loaded.
  //
  // Promoting the argument causes it to be loaded in the caller
  // unconditionally. This is only safe if we can prove that either the load
  // would have happened in the callee anyway (ie, there is a load in the entry
  // block) or the pointer passed in at every call site is guaranteed to be
  // valid.
  // In the former case, invalid loads can happen, but would have happened
  // anyway, in the latter case, invalid loads won't happen. This prevents us
  // from introducing an invalid load that wouldn't have happened in the
  // original code.
  //
  // This set will contain all sets of indices that are loaded in the entry
  // block, and thus are safe to unconditionally load in the caller.
  GEPIndicesSet SafeToUnconditionallyLoad;

  // This set contains all the sets of indices that we are planning to promote.
  // This makes it possible to limit the number of arguments added.
  GEPIndicesSet ToPromote;

  // If the pointer is always valid, any load with first index 0 is valid.
  if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));

  // First, iterate the entry block and mark loads of (geps of) arguments as
  // safe.
  BasicBlock *EntryBlock = Arg->getParent()->begin();
  // Declare this here so we can reuse it
  IndicesVector Indices;
  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
       I != E; ++I)
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      Value *V = LI->getPointerOperand();
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
        V = GEP->getPointerOperand();
        if (V == Arg) {
          // This load actually loads (part of) Arg? Check the indices then.
          Indices.reserve(GEP->getNumIndices());
          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
               II != IE; ++II)
            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
              Indices.push_back(CI->getSExtValue());
            else
              // We found a non-constant GEP index for this argument? Bail out
              // right away, can't promote this argument at all.
              return false;

          // Indices checked out, mark them as safe
          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
          Indices.clear();
        }
      } else if (V == Arg) {
        // Direct loads are equivalent to a GEP with a single 0 index.
        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
      }
    }

  // Now, iterate all uses of the argument to see if there are any uses that are
  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
  SmallVector<LoadInst*, 16> Loads;
  IndicesVector Operands;
  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
       UI != E; ++UI) {
    User *U = *UI;
    Operands.clear();
    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
      // Don't hack volatile/atomic loads
      if (!LI->isSimple()) return false;
      Loads.push_back(LI);
      // Direct loads are equivalent to a GEP with a zero index and then a load.
      Operands.push_back(0);
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
      if (GEP->use_empty()) {
        // Dead GEP's cause trouble later.  Just remove them if we run into
        // them.
        getAnalysis<AliasAnalysis>().deleteValue(GEP);
        GEP->eraseFromParent();
        // TODO: This runs the above loop over and over again for dead GEPs
        // Couldn't we just do increment the UI iterator earlier and erase the
        // use?
        return isSafeToPromoteArgument(Arg, isByVal);
      }

      // Ensure that all of the indices are constants.
      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
        i != e; ++i)
        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
          Operands.push_back(C->getSExtValue());
        else
          return false;  // Not a constant operand GEP!

      // Ensure that the only users of the GEP are load instructions.
      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
           UI != E; ++UI)
        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
          // Don't hack volatile/atomic loads
          if (!LI->isSimple()) return false;
          Loads.push_back(LI);
        } else {
          // Other uses than load?
          return false;
        }
    } else {
      return false;  // Not a load or a GEP.
    }

    // Now, see if it is safe to promote this load / loads of this GEP. Loading
    // is safe if Operands, or a prefix of Operands, is marked as safe.
    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
      return false;

    // See if we are already promoting a load with these indices. If not, check
    // to make sure that we aren't promoting too many elements.  If so, nothing
    // to do.
    if (ToPromote.find(Operands) == ToPromote.end()) {
      if (maxElements > 0 && ToPromote.size() == maxElements) {
        DEBUG(dbgs() << "argpromotion not promoting argument '"
              << Arg->getName() << "' because it would require adding more "
              << "than " << maxElements << " arguments to the function.\n");
        // We limit aggregate promotion to only promoting up to a fixed number
        // of elements of the aggregate.
        return false;
      }
      ToPromote.insert(Operands);
    }
  }

  if (Loads.empty()) return true;  // No users, this is a dead argument.

  // Okay, now we know that the argument is only used by load instructions and
  // it is safe to unconditionally perform all of them. Use alias analysis to
  // check to see if the pointer is guaranteed to not be modified from entry of
  // the function to each of the load instructions.

  // Because there could be several/many load instructions, remember which
  // blocks we know to be transparent to the load.
  SmallPtrSet<BasicBlock*, 16> TranspBlocks;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
    // Check to see if the load is invalidated from the start of the block to
    // the load itself.
    LoadInst *Load = Loads[i];
    BasicBlock *BB = Load->getParent();

    AliasAnalysis::Location Loc = AA.getLocation(Load);
    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
      return false;  // Pointer is invalidated!

    // Now check every path from the entry block to the load for transparency.
    // To do this, we perform a depth first search on the inverse CFG from the
    // loading block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *P = *PI;
      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
             I = idf_ext_begin(P, TranspBlocks),
             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
        if (AA.canBasicBlockModify(**I, Loc))
          return false;
    }
  }

  // If the path from the entry of the function to each load is free of
  // instructions that potentially invalidate the load, we can make the
  // transformation!
  return true;
}