示例#1
0
// We generate a loop of either of the following structures:
//
//              BeforeBB                      BeforeBB
//                 |                             |
//                 v                             v
//              GuardBB                      PreHeaderBB
//              /      |                         |   _____
//     __  PreHeaderBB  |                        v  \/    |
//    /  \    /         |                     HeaderBB  latch
// latch  HeaderBB      |                        |\       |
//    \  /    \         /                        | \------/
//     <       \       /                         |
//              \     /                          v
//              ExitBB                         ExitBB
//
// depending on whether or not we know that it is executed at least once. If
// not, GuardBB checks if the loop is executed at least once. If this is the
// case we branch to PreHeaderBB and subsequently to the HeaderBB, which
// contains the loop iv 'polly.indvar', the incremented loop iv
// 'polly.indvar_next' as well as the condition to check if we execute another
// iteration of the loop. After the loop has finished, we branch to ExitBB.
Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
                         PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
                         DominatorTree &DT, BasicBlock *&ExitBB,
                         ICmpInst::Predicate Predicate,
                         LoopAnnotator *Annotator, bool Parallel,
                         bool UseGuard) {
  Function *F = Builder.GetInsertBlock()->getParent();
  LLVMContext &Context = F->getContext();

  assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
  IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
  assert(LoopIVType && "UB is not integer?");

  BasicBlock *BeforeBB = Builder.GetInsertBlock();
  BasicBlock *GuardBB =
      UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
  BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
  BasicBlock *PreHeaderBB =
      BasicBlock::Create(Context, "polly.loop_preheader", F);

  if (Annotator) {
    Annotator->Begin(HeaderBB);
    if (Parallel)
      Annotator->SetCurrentParallel();
  }

  // Update LoopInfo
  Loop *OuterLoop = LI.getLoopFor(BeforeBB);
  Loop *NewLoop = new Loop();

  if (OuterLoop)
    OuterLoop->addChildLoop(NewLoop);
  else
    LI.addTopLevelLoop(NewLoop);

  if (OuterLoop && GuardBB)
    OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase());
  else if (OuterLoop)
    OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase());

  NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase());

  // ExitBB
  ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P);
  ExitBB->setName("polly.loop_exit");

  // BeforeBB
  if (GuardBB) {
    BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
    DT.addNewBlock(GuardBB, BeforeBB);

    // GuardBB
    Builder.SetInsertPoint(GuardBB);
    Value *LoopGuard;
    LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
    LoopGuard->setName("polly.loop_guard");
    Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
    DT.addNewBlock(PreHeaderBB, GuardBB);
  } else {
    BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
    DT.addNewBlock(PreHeaderBB, BeforeBB);
  }

  // PreHeaderBB
  Builder.SetInsertPoint(PreHeaderBB);
  Builder.CreateBr(HeaderBB);

  // HeaderBB
  DT.addNewBlock(HeaderBB, PreHeaderBB);
  Builder.SetInsertPoint(HeaderBB);
  PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
  IV->addIncoming(LB, PreHeaderBB);
  Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
  Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
  Value *LoopCondition;
  UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
  LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
  LoopCondition->setName("polly.loop_cond");
  Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
  IV->addIncoming(IncrementedIV, HeaderBB);
  if (GuardBB)
    DT.changeImmediateDominator(ExitBB, GuardBB);
  else
    DT.changeImmediateDominator(ExitBB, BeforeBB);

  // The loop body should be added here.
  Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
  return IV;
}
示例#2
0
void WinEHPrepare::removeImplausibleInstructions(Function &F) {
  // Remove implausible terminators and replace them with UnreachableInst.
  for (auto &Funclet : FuncletBlocks) {
    BasicBlock *FuncletPadBB = Funclet.first;
    std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
    Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
    auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
    auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
    auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);

    for (BasicBlock *BB : BlocksInFunclet) {
      for (Instruction &I : *BB) {
        CallSite CS(&I);
        if (!CS)
          continue;

        Value *FuncletBundleOperand = nullptr;
        if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
          FuncletBundleOperand = BU->Inputs.front();

        if (FuncletBundleOperand == FuncletPad)
          continue;

        // Skip call sites which are nounwind intrinsics or inline asm.
        auto *CalledFn =
            dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
        if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) ||
                         CS.isInlineAsm()))
          continue;

        // This call site was not part of this funclet, remove it.
        if (CS.isInvoke()) {
          // Remove the unwind edge if it was an invoke.
          removeUnwindEdge(BB);
          // Get a pointer to the new call.
          BasicBlock::iterator CallI =
              std::prev(BB->getTerminator()->getIterator());
          auto *CI = cast<CallInst>(&*CallI);
          changeToUnreachable(CI, /*UseLLVMTrap=*/false);
        } else {
          changeToUnreachable(&I, /*UseLLVMTrap=*/false);
        }

        // There are no more instructions in the block (except for unreachable),
        // we are done.
        break;
      }

      Instruction *TI = BB->getTerminator();
      // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
      bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
      // The token consumed by a CatchReturnInst must match the funclet token.
      bool IsUnreachableCatchret = false;
      if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
        IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
      // The token consumed by a CleanupReturnInst must match the funclet token.
      bool IsUnreachableCleanupret = false;
      if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
        IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
      if (IsUnreachableRet || IsUnreachableCatchret ||
          IsUnreachableCleanupret) {
        changeToUnreachable(TI, /*UseLLVMTrap=*/false);
      } else if (isa<InvokeInst>(TI)) {
        if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
          // Invokes within a cleanuppad for the MSVC++ personality never
          // transfer control to their unwind edge: the personality will
          // terminate the program.
          removeUnwindEdge(BB);
        }
      }
    }
  }
}
示例#3
0
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
  // First, verify that this function is an unswitching candidate...
  BasicBlock *EntryBlock = &F->front();
  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return nullptr;

  BasicBlock *ReturnBlock = nullptr;
  BasicBlock *NonReturnBlock = nullptr;
  unsigned ReturnCount = 0;
  for (BasicBlock *BB : successors(EntryBlock)) {
    if (isa<ReturnInst>(BB->getTerminator())) {
      ReturnBlock = BB;
      ReturnCount++;
    } else
      NonReturnBlock = BB;
  }

  if (ReturnCount != 1)
    return nullptr;

  // Clone the function, so that we can hack away on it.
  ValueToValueMapTy VMap;
  Function *DuplicateFunction = CloneFunction(F, VMap);
  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);

  // Go ahead and update all uses to the duplicate, so that we can just
  // use the inliner functionality when we're done hacking.
  F->replaceAllUsesWith(DuplicateFunction);

  // Special hackery is needed with PHI nodes that have inputs from more than
  // one extracted block.  For simplicity, just split the PHIs into a two-level
  // sequence of PHIs, some of which will go in the extracted region, and some
  // of which will go outside.
  BasicBlock *PreReturn = NewReturnBlock;
  NewReturnBlock = NewReturnBlock->splitBasicBlock(
      NewReturnBlock->getFirstNonPHI()->getIterator());
  BasicBlock::iterator I = PreReturn->begin();
  Instruction *Ins = &NewReturnBlock->front();
  while (I != PreReturn->end()) {
    PHINode *OldPhi = dyn_cast<PHINode>(I);
    if (!OldPhi)
      break;

    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
    OldPhi->replaceAllUsesWith(RetPhi);
    Ins = NewReturnBlock->getFirstNonPHI();

    RetPhi->addIncoming(&*I, PreReturn);
    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
                        NewEntryBlock);
    OldPhi->removeIncomingValue(NewEntryBlock);

    ++I;
  }
  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);

  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock *> ToExtract;
  ToExtract.push_back(NewNonReturnBlock);
  for (BasicBlock &BB : *DuplicateFunction)
    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
        &BB != NewNonReturnBlock)
      ToExtract.push_back(&BB);

  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.recalculate(*DuplicateFunction);

  // Extract the body of the if.
  Function *ExtractedFunction =
      CodeExtractor(ToExtract, &DT).extractCodeRegion();

  // Inline the top-level if test into all callers.
  std::vector<User *> Users(DuplicateFunction->user_begin(),
                            DuplicateFunction->user_end());
  for (User *User : Users)
    if (CallInst *CI = dyn_cast<CallInst>(User))
      InlineFunction(CI, IFI);
    else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
      InlineFunction(II, IFI);

  // Ditch the duplicate, since we're done with it, and rewrite all remaining
  // users (function pointers, etc.) back to the original function.
  DuplicateFunction->replaceAllUsesWith(F);
  DuplicateFunction->eraseFromParent();

  ++NumPartialInlined;

  return ExtractedFunction;
}
示例#4
0
void DSWP::cleanup(Loop *L, LPPassManager &LPM) {
	// Move some instructions that may not have been inserted in the right
	// place, delete the old loop, and clean up our aux data structures for this
	// loop.

	/*
	 * move the produce instructions, which have been inserted after the branch,
	 * in front of it
	 */
	for (int i = 0; i < MAX_THREAD; i++) {
		for (Function::iterator bi = allFunc[i]->begin(),
								be = allFunc[i]->end();
				bi != be; ++bi) {
			BasicBlock *bb = bi;
			TerminatorInst *term = NULL;
			for (BasicBlock::iterator ii = bb->begin(), ie = bb->end();
					ii != ie; ++ii) {
				Instruction *inst = ii;
				if (isa<TerminatorInst>(inst)) {
					term = dyn_cast<TerminatorInst>(inst);
					break;
				}
			}

			if (term == NULL) {
				error("term cannot be null");
			}

			while (true) {
				Instruction *last = &bb->getInstList().back();
				if (isa<TerminatorInst>(last))
					break;
				last->moveBefore(term);
			}
		}
	}

	/*
     * move the phi nodes to the top of the block
	 */
	for (int i = 0; i < MAX_THREAD; i++) {
		for (Function::iterator bi = allFunc[i]->begin(),
								be = allFunc[i]->end();
				bi != be; ++bi) {
			BasicBlock *bb = bi;
			Instruction *first_nonphi = bb->getFirstNonPHI();

			BasicBlock::iterator ii = bb->begin(), ie = bb->end();
			// advance the iterator up to one past first_nonphi
			while (&(*ii) != first_nonphi) { ++ii; }
			++ii;

			// move any phi nodes after the first nonphi to before it
			for (BasicBlock::iterator i_next; ii != ie; ii = i_next) {
				i_next = ii;
				++i_next;

				Instruction *inst = ii;
				if (isa<PHINode>(inst)) {
					inst->moveBefore(first_nonphi);
				}
			}
		}
	}

	cout << "begin to delete loop" << endl;
	for (Loop::block_iterator bi = L->block_begin(), be = L->block_end();
			bi != be; ++bi) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ii = BB->begin(), i_next, ie = BB->end();
				ii != ie; ii = i_next) {
			i_next = ii;
			++i_next;
			Instruction &inst = *ii;
			inst.replaceAllUsesWith(UndefValue::get(inst.getType()));
			inst.eraseFromParent();
		}
	}

	// Delete the basic blocks only afterwards
	// so that backwards branch instructions don't break
	for (Loop::block_iterator bi = L->block_begin(), be = L->block_end();
			bi != be; ++bi) {
		BasicBlock *BB = *bi;
		BB->eraseFromParent();
	}

	LPM.deleteLoopFromQueue(L);

}
示例#5
0
void WinEHPrepare::cloneCommonBlocks(Function &F) {
  // We need to clone all blocks which belong to multiple funclets.  Values are
  // remapped throughout the funclet to propagate both the new instructions
  // *and* the new basic blocks themselves.
  for (auto &Funclets : FuncletBlocks) {
    BasicBlock *FuncletPadBB = Funclets.first;
    std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
    Value *FuncletToken;
    if (FuncletPadBB == &F.getEntryBlock())
      FuncletToken = ConstantTokenNone::get(F.getContext());
    else
      FuncletToken = FuncletPadBB->getFirstNonPHI();

    std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
    ValueToValueMapTy VMap;
    for (BasicBlock *BB : BlocksInFunclet) {
      ColorVector &ColorsForBB = BlockColors[BB];
      // We don't need to do anything if the block is monochromatic.
      size_t NumColorsForBB = ColorsForBB.size();
      if (NumColorsForBB == 1)
        continue;

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Cloning block \'" << BB->getName()
                              << "\' for funclet \'" << FuncletPadBB->getName()
                              << "\'.\n");

      // Create a new basic block and copy instructions into it!
      BasicBlock *CBB =
          CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
      // Insert the clone immediately after the original to ensure determinism
      // and to keep the same relative ordering of any funclet's blocks.
      CBB->insertInto(&F, BB->getNextNode());

      // Add basic block mapping.
      VMap[BB] = CBB;

      // Record delta operations that we need to perform to our color mappings.
      Orig2Clone.emplace_back(BB, CBB);
    }

    // If nothing was cloned, we're done cloning in this funclet.
    if (Orig2Clone.empty())
      continue;

    // Update our color mappings to reflect that one block has lost a color and
    // another has gained a color.
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;

      BlocksInFunclet.push_back(NewBlock);
      ColorVector &NewColors = BlockColors[NewBlock];
      assert(NewColors.empty() && "A new block should only have one color!");
      NewColors.push_back(FuncletPadBB);

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Assigned color \'" << FuncletPadBB->getName()
                              << "\' to block \'" << NewBlock->getName()
                              << "\'.\n");

      BlocksInFunclet.erase(
          std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
          BlocksInFunclet.end());
      ColorVector &OldColors = BlockColors[OldBlock];
      OldColors.erase(
          std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
          OldColors.end());

      DEBUG_WITH_TYPE("winehprepare-coloring",
                      dbgs() << "  Removed color \'" << FuncletPadBB->getName()
                              << "\' from block \'" << OldBlock->getName()
                              << "\'.\n");
    }

    // Loop over all of the instructions in this funclet, fixing up operand
    // references as we go.  This uses VMap to do all the hard work.
    for (BasicBlock *BB : BlocksInFunclet)
      // Loop over all instructions, fixing each one as we find it...
      for (Instruction &I : *BB)
        RemapInstruction(&I, VMap,
                         RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);

    // Catchrets targeting cloned blocks need to be updated separately from
    // the loop above because they are not in the current funclet.
    SmallVector<CatchReturnInst *, 2> FixupCatchrets;
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;

      FixupCatchrets.clear();
      for (BasicBlock *Pred : predecessors(OldBlock))
        if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
          if (CatchRet->getCatchSwitchParentPad() == FuncletToken)
            FixupCatchrets.push_back(CatchRet);

      for (CatchReturnInst *CatchRet : FixupCatchrets)
        CatchRet->setSuccessor(NewBlock);
    }

    auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
      unsigned NumPreds = PN->getNumIncomingValues();
      for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
           ++PredIdx) {
        BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
        bool EdgeTargetsFunclet;
        if (auto *CRI =
                dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
          EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken);
        } else {
          ColorVector &IncomingColors = BlockColors[IncomingBlock];
          assert(!IncomingColors.empty() && "Block not colored!");
          assert((IncomingColors.size() == 1 ||
                  llvm::all_of(IncomingColors,
                               [&](BasicBlock *Color) {
                                 return Color != FuncletPadBB;
                               })) &&
                 "Cloning should leave this funclet's blocks monochromatic");
          EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
        }
        if (IsForOldBlock != EdgeTargetsFunclet)
          continue;
        PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
        // Revisit the next entry.
        --PredIdx;
        --PredEnd;
      }
    };

    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
      for (PHINode &OldPN : OldBlock->phis()) {
        UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
      }
      for (PHINode &NewPN : NewBlock->phis()) {
        UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
      }
    }

    // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
    // the PHI nodes for NewBB now.
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
      for (BasicBlock *SuccBB : successors(NewBlock)) {
        for (PHINode &SuccPN : SuccBB->phis()) {
          // Ok, we have a PHI node.  Figure out what the incoming value was for
          // the OldBlock.
          int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
          if (OldBlockIdx == -1)
            break;
          Value *IV = SuccPN.getIncomingValue(OldBlockIdx);

          // Remap the value if necessary.
          if (auto *Inst = dyn_cast<Instruction>(IV)) {
            ValueToValueMapTy::iterator I = VMap.find(Inst);
            if (I != VMap.end())
              IV = I->second;
          }

          SuccPN.addIncoming(IV, NewBlock);
        }
      }
    }

    for (ValueToValueMapTy::value_type VT : VMap) {
      // If there were values defined in BB that are used outside the funclet,
      // then we now have to update all uses of the value to use either the
      // original value, the cloned value, or some PHI derived value.  This can
      // require arbitrary PHI insertion, of which we are prepared to do, clean
      // these up now.
      SmallVector<Use *, 16> UsesToRename;

      auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
      if (!OldI)
        continue;
      auto *NewI = cast<Instruction>(VT.second);
      // Scan all uses of this instruction to see if it is used outside of its
      // funclet, and if so, record them in UsesToRename.
      for (Use &U : OldI->uses()) {
        Instruction *UserI = cast<Instruction>(U.getUser());
        BasicBlock *UserBB = UserI->getParent();
        ColorVector &ColorsForUserBB = BlockColors[UserBB];
        assert(!ColorsForUserBB.empty());
        if (ColorsForUserBB.size() > 1 ||
            *ColorsForUserBB.begin() != FuncletPadBB)
          UsesToRename.push_back(&U);
      }

      // If there are no uses outside the block, we're done with this
      // instruction.
      if (UsesToRename.empty())
        continue;

      // We found a use of OldI outside of the funclet.  Rename all uses of OldI
      // that are outside its funclet to be uses of the appropriate PHI node
      // etc.
      SSAUpdater SSAUpdate;
      SSAUpdate.Initialize(OldI->getType(), OldI->getName());
      SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
      SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);

      while (!UsesToRename.empty())
        SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
    }
  }
}
示例#6
0
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
/// sink it into user blocks to reduce the number of virtual
/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
///
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
  // If this is a noop copy,
  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
  EVT DstVT = TLI.getValueType(CI->getType());

  // This is an fp<->int conversion?
  if (SrcVT.isInteger() != DstVT.isInteger())
    return false;

  // If this is an extension, it will be a zero or sign extension, which
  // isn't a noop.
  if (SrcVT.bitsLT(DstVT)) return false;

  // If these values will be promoted, find out what they will be promoted
  // to.  This helps us consider truncates on PPC as noop copies when they
  // are.
  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);

  // If, after promotion, these are the same types, this is a noop copy.
  if (SrcVT != DstVT)
    return false;

  BasicBlock *DefBB = CI->getParent();

  /// InsertedCasts - Only insert a cast in each block once.
  DenseMap<BasicBlock*, CastInst*> InsertedCasts;

  bool MadeChange = false;
  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
       UI != E; ) {
    Use &TheUse = UI.getUse();
    Instruction *User = cast<Instruction>(*UI);

    // Figure out which BB this cast is used in.  For PHI's this is the
    // appropriate predecessor block.
    BasicBlock *UserBB = User->getParent();
    if (PHINode *PN = dyn_cast<PHINode>(User)) {
      UserBB = PN->getIncomingBlock(UI);
    }

    // Preincrement use iterator so we don't invalidate it.
    ++UI;

    // If this user is in the same block as the cast, don't change the cast.
    if (UserBB == DefBB) continue;

    // If we have already inserted a cast into this block, use it.
    CastInst *&InsertedCast = InsertedCasts[UserBB];

    if (!InsertedCast) {
      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();

      InsertedCast =
        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                         InsertPt);
      MadeChange = true;
    }

    // Replace a use of the cast with a use of the new cast.
    TheUse = InsertedCast;
  }

  // If we removed all uses, nuke the cast.
  if (CI->use_empty()) {
    CI->eraseFromParent();
    MadeChange = true;
  }

  return MadeChange;
}
示例#7
0
bool ProfilingPass::runOnFunction(Function &F)
{
	LLVMContext& context = F.getContext();
    Module *m = F.getParent();
	string funcname = F.getNameStr();
	string injectfunc("injectFault");
	if((profileoption == 'p' || profileoption == 'c' || profileoption == 'i') && (funcname.find(injectfunc) != string::npos))
		return false;
	std::vector<Instruction*> insert_worklist;
	for (inst_iterator In = inst_begin(F), E = inst_end(F); In != E; ++In)
    {     
		Instruction *I = dyn_cast<Instruction>(&*In);
		//errs()<<*I<<"\n";
		if(profileoption == 'b')
		{
			if(CmpInst *ci = dyn_cast<CmpInst>(I))
				if(is_used_by_branch(ci)){
					vector<const Type*> argTypes(1);
					argTypes[0] = Type::getInt32Ty(context);	// enum for the options
					FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 );
					Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType);			// get the injection function
					vector<Value*> countArgs(1);
					const IntegerType* itype = IntegerType::get(context,32);
					Value* branchVal = ConstantInt::get(itype, BRANCH );

					countArgs[0] = branchVal; //enum for branch
					CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", I); 
				}
		}
		else if(profileoption == 'd')
		{
			//see if the current instruction is a cmp instruction that leads to a conditional branch
			//add the instrumentation to the defs of this cmp instruction
			//--> Static time deduction since branch not known if executed or not
			CmpInst *ci = dyn_cast<CmpInst>(I);
			//errs() <<"reached here:\n";
			if(!ci)
				continue;
	
			//traverse def-use chain
			//int is_used_by_branch = 0;    
	
			if(!is_used_by_branch(I))
				continue;
			//the defines of this instruction I --> would be injectFaultCalls.   
			for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) 
			{    
				Instruction *v = dyn_cast<Instruction>(*i);
				if(!v)
					continue;
				//errs() <<"reached here:"<< *v << "\n"; 
				//do profiling for the def
				vector<const Type*> argTypes(1);
				argTypes[0] = Type::getInt32Ty(context);	// enum for the options
				FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 );
				Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType);			// get the injection function
				vector<Value*> countArgs(1);
				const IntegerType* itype = IntegerType::get(context,32);
				Value* defVal = ConstantInt::get(itype, DEF );
				countArgs[0] = defVal; //enum for branch
				Instruction *beforeInst;
				if(isa<PHINode>(v))
				{	BasicBlock *bb = v->getParent();
					beforeInst = bb->getFirstNonPHI(); 
				}
				else
					beforeInst = v;
				CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst); 	// insert the profiling call before the def:v

			}
		}
		else if(profileoption == 'a' )
		{
			Instruction *beforeInst;
			//consider all instructions profiling
			vector<const Type*> argTypes(1);
			argTypes[0] = Type::getInt32Ty(context);	// enum for the options
			FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 );
			Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType);			// get the injection function
			vector<Value*> countArgs(1);
			const IntegerType* itype = IntegerType::get(context,32);
			Value* allVal = ConstantInt::get(itype, ALL );
			if(isa<PHINode>(I))
			{	BasicBlock *bb = I->getParent();
				beforeInst = bb->getFirstNonPHI(); 
			}
			else
				beforeInst = I;
			countArgs[0] = allVal; //enum for All inst
			CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst); 	// Insert the inject call before the instruction
		}
		//in fact, here we only use backwardslice ('s')
		else if(profileoption == 's')
		{
			const Type* returnType = I->getType();
			if (returnType->isVoidTy() || !filter(I))//Here we can insert a new filter ///////////////////////////////////////////////
			{
				//errs()<<"filter not passed\n";
				continue;
			}
//for injection into all instructions except invoke instructions (these are same as unconditional branch instructions with exception handling mechanism)
		  if((isa<InvokeInst>(I)) 
#ifdef EXCLUDE_CASTINST
|| (isa<CastInst>(I))
#endif
) // cast instruction added by Jiesheng
        continue;
			
			//errs()<<"filter passed\n";
			Instruction *beforeInst;
			if(isa<PHINode>(I))
			{	BasicBlock *bb = I->getParent();
				beforeInst = bb->getFirstNonPHI(); 
			}
			else
				beforeInst = I;
			insert_worklist.push_back(beforeInst);
		}

    }

    while(!insert_worklist.empty())
    {
    	Instruction* beforeInst = insert_worklist.back();
    	insert_worklist.pop_back();
    	vector<const Type*> argTypes(1);
			argTypes[0] = Type::getInt32Ty(context);	// enum for the options
			FunctionType* countFuncType = FunctionType::get( Type::getVoidTy(context), argTypes, 0 );
    	Constant* countFunc = m->getOrInsertFunction("doProfiling", countFuncType);
    	vector<Value*> countArgs(1);
    		const IntegerType* itype = IntegerType::get(context,32);
			Value* allVal = ConstantInt::get(itype, BACKWARD_SLICE );
    		countArgs[0] = allVal; //enum for All inst

    	CallInst::Create( countFunc, countArgs.begin(),countArgs.end(), "", beforeInst);

    }
    return true;
}
示例#8
0
bool ObjCARCContract::runOnFunction(Function &F) {
  if (!EnableARCOpts)
    return false;

  // If nothing in the Module uses ARC, don't do anything.
  if (!Run)
    return false;

  Changed = false;
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());

  DenseMap<BasicBlock *, ColorVector> BlockColors;
  if (F.hasPersonalityFn() &&
      isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
    BlockColors = colorEHFunclets(F);

  LLVM_DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");

  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
  // keyword. Be conservative if the function has variadic arguments.
  // It seems that functions which "return twice" are also unsafe for the
  // "tail" argument, because they are setjmp, which could need to
  // return to an earlier stack state.
  bool TailOkForStoreStrongs =
      !F.isVarArg() && !F.callsFunctionThatReturnsTwice();

  // For ObjC library calls which return their argument, replace uses of the
  // argument with uses of the call return value, if it dominates the use. This
  // reduces register pressure.
  SmallPtrSet<Instruction *, 4> DependingInstructions;
  SmallPtrSet<const BasicBlock *, 4> Visited;
  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
    Instruction *Inst = &*I++;

    LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n");

    // First try to peephole Inst. If there is nothing further we can do in
    // terms of undoing objc-arc-expand, process the next inst.
    if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
                                 TailOkForStoreStrongs, BlockColors))
      continue;

    // Otherwise, try to undo objc-arc-expand.

    // Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
    // and such; to do the replacement, the argument must have type i8*.

    // Function for replacing uses of Arg dominated by Inst.
    auto ReplaceArgUses = [Inst, this](Value *Arg) {
      // If we're compiling bugpointed code, don't get in trouble.
      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
        return;

      // Look through the uses of the pointer.
      for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
           UI != UE; ) {
        // Increment UI now, because we may unlink its element.
        Use &U = *UI++;
        unsigned OperandNo = U.getOperandNo();

        // If the call's return value dominates a use of the call's argument
        // value, rewrite the use to use the return value. We check for
        // reachability here because an unreachable call is considered to
        // trivially dominate itself, which would lead us to rewriting its
        // argument in terms of its return value, which would lead to
        // infinite loops in GetArgRCIdentityRoot.
        if (!DT->isReachableFromEntry(U) || !DT->dominates(Inst, U))
          continue;

        Changed = true;
        Instruction *Replacement = Inst;
        Type *UseTy = U.get()->getType();
        if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
          // For PHI nodes, insert the bitcast in the predecessor block.
          unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
          BasicBlock *IncomingBB = PHI->getIncomingBlock(ValNo);
          if (Replacement->getType() != UseTy) {
            // A catchswitch is both a pad and a terminator, meaning a basic
            // block with a catchswitch has no insertion point. Keep going up
            // the dominator tree until we find a non-catchswitch.
            BasicBlock *InsertBB = IncomingBB;
            while (isa<CatchSwitchInst>(InsertBB->getFirstNonPHI())) {
              InsertBB = DT->getNode(InsertBB)->getIDom()->getBlock();
            }

            assert(DT->dominates(Inst, &InsertBB->back()) &&
                   "Invalid insertion point for bitcast");
            Replacement =
                new BitCastInst(Replacement, UseTy, "", &InsertBB->back());
          }

          // While we're here, rewrite all edges for this PHI, rather
          // than just one use at a time, to minimize the number of
          // bitcasts we emit.
          for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
            if (PHI->getIncomingBlock(i) == IncomingBB) {
              // Keep the UI iterator valid.
              if (UI != UE &&
                  &PHI->getOperandUse(
                      PHINode::getOperandNumForIncomingValue(i)) == &*UI)
                ++UI;
              PHI->setIncomingValue(i, Replacement);
            }
        } else {
          if (Replacement->getType() != UseTy)
            Replacement = new BitCastInst(Replacement, UseTy, "",
                                          cast<Instruction>(U.getUser()));
          U.set(Replacement);
        }
      }
    };


    Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
    Value *OrigArg = Arg;

    // TODO: Change this to a do-while.
    for (;;) {
      ReplaceArgUses(Arg);

      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
        Arg = BI->getOperand(0);
      else if (isa<GEPOperator>(Arg) &&
               cast<GEPOperator>(Arg)->hasAllZeroIndices())
        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
      else if (isa<GlobalAlias>(Arg) &&
               !cast<GlobalAlias>(Arg)->isInterposable())
        Arg = cast<GlobalAlias>(Arg)->getAliasee();
      else {
        // If Arg is a PHI node, get PHIs that are equivalent to it and replace
        // their uses.
        if (PHINode *PN = dyn_cast<PHINode>(Arg)) {
          SmallVector<Value *, 1> PHIList;
          getEquivalentPHIs(*PN, PHIList);
          for (Value *PHI : PHIList)
            ReplaceArgUses(PHI);
        }
        break;
      }
    }

    // Replace bitcast users of Arg that are dominated by Inst.
    SmallVector<BitCastInst *, 2> BitCastUsers;

    // Add all bitcast users of the function argument first.
    for (User *U : OrigArg->users())
      if (auto *BC = dyn_cast<BitCastInst>(U))
        BitCastUsers.push_back(BC);

    // Replace the bitcasts with the call return. Iterate until list is empty.
    while (!BitCastUsers.empty()) {
      auto *BC = BitCastUsers.pop_back_val();
      for (User *U : BC->users())
        if (auto *B = dyn_cast<BitCastInst>(U))
          BitCastUsers.push_back(B);

      ReplaceArgUses(BC);
    }
  }

  // If this function has no escaping allocas or suspicious vararg usage,
  // objc_storeStrong calls can be marked with the "tail" keyword.
  if (TailOkForStoreStrongs)
    for (CallInst *CI : StoreStrongCalls)
      CI->setTailCall();
  StoreStrongCalls.clear();

  return Changed;
}
示例#9
0
/// Create a clone of the blocks in a loop and connect them together.
/// If CreateRemainderLoop is false, loop structure will not be cloned,
/// otherwise a new loop will be created including all cloned blocks, and the
/// iterator of it switches to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
                const bool UseEpilogRemainder, const bool UnrollRemainder,
                BasicBlock *InsertTop,
                BasicBlock *InsertBot, BasicBlock *Preheader,
                std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
                ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  Function *F = Header->getParent();
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  Loop *ParentLoop = L->getParentLoop();
  NewLoopsMap NewLoops;
  NewLoops[ParentLoop] = ParentLoop;
  if (!CreateRemainderLoop)
    NewLoops[L] = ParentLoop;

  // For each block in the original loop, create a new copy,
  // and update the value map with the newly created values.
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
    NewBlocks.push_back(NewBB);

    // If we're unrolling the outermost loop, there's no remainder loop,
    // and this block isn't in a nested loop, then the new block is not
    // in any loop. Otherwise, add it to loopinfo.
    if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
      addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);

    VMap[*BB] = NewBB;
    if (Header == *BB) {
      // For the first block, add a CFG connection to this newly
      // created block.
      InsertTop->getTerminator()->setSuccessor(0, NewBB);
    }

    if (DT) {
      if (Header == *BB) {
        // The header is dominated by the preheader.
        DT->addNewBlock(NewBB, InsertTop);
      } else {
        // Copy information from original loop to unrolled loop.
        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
      }
    }

    if (Latch == *BB) {
      // For the last block, if CreateRemainderLoop is false, create a direct
      // jump to InsertBot. If not, create a loop back to cloned head.
      VMap.erase((*BB)->getTerminator());
      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
      IRBuilder<> Builder(LatchBR);
      if (!CreateRemainderLoop) {
        Builder.CreateBr(InsertBot);
      } else {
        PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
                                          suffix + ".iter",
                                          FirstLoopBB->getFirstNonPHI());
        Value *IdxSub =
            Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                              NewIdx->getName() + ".sub");
        Value *IdxCmp =
            Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
        NewIdx->addIncoming(NewIter, InsertTop);
        NewIdx->addIncoming(IdxSub, NewBB);
      }
      LatchBR->eraseFromParent();
    }
  }

  // Change the incoming values to the ones defined in the preheader or
  // cloned loop.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
    if (!CreateRemainderLoop) {
      if (UseEpilogRemainder) {
        unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
        NewPHI->setIncomingBlock(idx, InsertTop);
        NewPHI->removeIncomingValue(Latch, false);
      } else {
        VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
        cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
      }
    } else {
      unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
      NewPHI->setIncomingBlock(idx, InsertTop);
      BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
      idx = NewPHI->getBasicBlockIndex(Latch);
      Value *InVal = NewPHI->getIncomingValue(idx);
      NewPHI->setIncomingBlock(idx, NewLatch);
      if (Value *V = VMap.lookup(InVal))
        NewPHI->setIncomingValue(idx, V);
    }
  }
  if (CreateRemainderLoop) {
    Loop *NewLoop = NewLoops[L];
    MDNode *LoopID = NewLoop->getLoopID();
    assert(NewLoop && "L should have been cloned");

    // Only add loop metadata if the loop is not going to be completely
    // unrolled.
    if (UnrollRemainder)
      return NewLoop;

    Optional<MDNode *> NewLoopID = makeFollowupLoopID(
        LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
    if (NewLoopID.hasValue()) {
      NewLoop->setLoopID(NewLoopID.getValue());

      // Do not setLoopAlreadyUnrolled if loop attributes have been defined
      // explicitly.
      return NewLoop;
    }

    // Add unroll disable metadata to disable future unrolling for this loop.
    NewLoop->setLoopAlreadyUnrolled();
    return NewLoop;
  }
  else
    return nullptr;
}
示例#10
0
    Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst,
            Instruction* induction, BasicBlock* header, int offset){

        // Holds the body of the interesting loop
        BasicBlock *body = inst->getParent();

        assert(header && "Header is null");
        assert(header->getTerminator() && "Header has no terminator");

        // Maps the old instructions to the new Instructions
        DenseMap<const Value *, Value *>  ValueMap;
        // Do the actual clone
        stringstream iname;
        iname<<"___"<<offset<<"___";
        BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str());

        // Fixing the dependencies for each of the instructions in the cloned BB
        // They now depend on themselves rather on the old cloned BB.
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) {
                if (ValueMap.end() != ValueMap.find(*ops)) {
                    //*ops = ValueMap[*ops];
                    it->replaceUsesOfWith(*ops, ValueMap[*ops]);
                }
            }
        }

        // Fixing the PHI nodes since they are no longer needed
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            if (PHINode *phi = dyn_cast<PHINode>(it)) {
                // Taking the preheader entryfrom the PHI node

                Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header));
                assert(prevalue && "no prevalue. Don't know what to do");

                // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ?
                // If so, turn it into A[i + offset]
                if (ValueMap[induction] == phi) {
                    Instruction *add = subscripts::incrementValue(prevalue, offset);
                    //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1)
                    phi->getParent()->getInstList().insert(phi, add);
                    phi->replaceAllUsesWith(add);
                }  else {
                    // eliminating the PHI node all together
                    // This is just a regular variable or constant. No need to increment
                    // the index.
                    phi->replaceAllUsesWith(prevalue);
                }
            } 
        }

        // Move all non PHI and non terminator instructions into the header.
        while (!newBB->getFirstNonPHI()->isTerminator()) {
            Instruction* inst = newBB->getFirstNonPHI();
            if (dyn_cast<StoreInst>(inst)) {
                inst->eraseFromParent();
            } else {
                inst->moveBefore(header->getTerminator());
            }
        }
        newBB->dropAllReferences();
        return ValueMap[inst];
    }
示例#11
0
/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
/// their control flow to better facilitate subsequent optimization.
Instruction *
SimplifyHalfPowrLibCalls::
InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
                Instruction *InsertPt) {
  std::vector<BasicBlock *> Bodies;
  BasicBlock *NewBlock = 0;

  for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
    CallInst *Call = cast<CallInst>(HalfPowrs[i]);
    Function *Callee = Call->getCalledFunction();

    // Minimally sanity-check the CFG of half_powr to ensure that it contains
    // the kind of code we expect.  If we're running this pass, we have
    // reason to believe it will be what we expect.
    Function::iterator I = Callee->begin();
    BasicBlock *Prologue = I++;
    if (I == Callee->end()) break;
    BasicBlock *SubnormalHandling = I++;
    if (I == Callee->end()) break;
    BasicBlock *Body = I++;
    if (I != Callee->end()) break;
    if (SubnormalHandling->getSinglePredecessor() != Prologue)
      break;
    BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
    if (!PBI || !PBI->isConditional())
      break;
    BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
    if (!SNBI || SNBI->isConditional())
      break;
    if (!isa<ReturnInst>(Body->getTerminator()))
      break;

    Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));

    // Inline the call, taking care of what code ends up where.
    NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);

    InlineFunctionInfo IFI(0, TD);
    bool B = InlineFunction(Call, IFI);
    assert(B && "half_powr didn't inline?"); B=B;

    BasicBlock *NewBody = NewBlock->getSinglePredecessor();
    assert(NewBody);
    Bodies.push_back(NewBody);
  }

  if (!NewBlock)
    return InsertPt;

  // Put the code for all the bodies into one block, to facilitate
  // subsequent optimization.
  (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
  for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
    BasicBlock *Body = Bodies[i];
    Instruction *FNP = Body->getFirstNonPHI();
    // Splice the insts from body into NewBlock.
    NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
                                   FNP, Body->getTerminator());
  }

  return NewBlock->begin();
}
示例#12
0
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop.  We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
void LICM::PromoteAliasSet(AliasSet &AS) {
  // We can promote this alias set if it has a store, if it is a "Must" alias
  // set, if the pointer is loop invariant, and if we are not eliminating any
  // volatile loads or stores.
  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
    return;
  
  assert(!AS.empty() &&
         "Must alias set should have at least one pointer element in it!");
  Value *SomePtr = AS.begin()->getValue();

  // It isn't safe to promote a load/store from the loop if the load/store is
  // conditional.  For example, turning:
  //
  //    for () { if (c) *P += 1; }
  //
  // into:
  //
  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
  //
  // is not safe, because *P may only be valid to access if 'c' is true.
  // 
  // It is safe to promote P if all uses are direct load/stores and if at
  // least one is guaranteed to be executed.
  bool GuaranteedToExecute = false;
  
  SmallVector<Instruction*, 64> LoopUses;
  SmallPtrSet<Value*, 4> PointerMustAliases;

  // Check that all of the pointers in the alias set have the same type.  We
  // cannot (yet) promote a memory location that is loaded and stored in
  // different sizes.
  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
    Value *ASIV = ASI->getValue();
    PointerMustAliases.insert(ASIV);
    
    // Check that all of the pointers in the alias set have the same type.  We
    // cannot (yet) promote a memory location that is loaded and stored in
    // different sizes.
    if (SomePtr->getType() != ASIV->getType())
      return;
    
    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
         UI != UE; ++UI) {
      // Ignore instructions that are outside the loop.
      Instruction *Use = dyn_cast<Instruction>(*UI);
      if (!Use || !CurLoop->contains(Use))
        continue;
      
      // If there is an non-load/store instruction in the loop, we can't promote
      // it.
      if (isa<LoadInst>(Use))
        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
      else if (isa<StoreInst>(Use)) {
        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
        if (Use->getOperand(0) == ASIV) return;
      } else
        return; // Not a load or store.
      
      if (!GuaranteedToExecute)
        GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
      
      LoopUses.push_back(Use);
    }
  }
  
  // If there isn't a guaranteed-to-execute instruction, we can't promote.
  if (!GuaranteedToExecute)
    return;
  
  // Otherwise, this is safe to promote, lets do it!
  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
  Changed = true;
  ++NumPromoted;

  // We use the SSAUpdater interface to insert phi nodes as required.
  SmallVector<PHINode*, 16> NewPHIs;
  SSAUpdater SSA(&NewPHIs);
  
  // It wants to know some value of the same type as what we'll be inserting.
  Value *SomeValue;
  if (isa<LoadInst>(LoopUses[0]))
    SomeValue = LoopUses[0];
  else
    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
  SSA.Initialize(SomeValue->getType(), SomeValue->getName());

  // First step: bucket up uses of the pointers by the block they occur in.
  // This is important because we have to handle multiple defs/uses in a block
  // ourselves: SSAUpdater is purely for cross-block references.
  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    UsesByBlock[User->getParent()].push_back(User);
  }
  
  // Okay, now we can iterate over all the blocks in the loop with uses,
  // processing them.  Keep track of which loads are loading a live-in value.
  SmallVector<LoadInst*, 32> LiveInLoads;
  DenseMap<Value*, Value*> ReplacedLoads;
  
  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
    Instruction *User = LoopUses[LoopUse];
    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
    
    // If this block has already been processed, ignore this repeat use.
    if (BlockUses.empty()) continue;
    
    // Okay, this is the first use in the block.  If this block just has a
    // single user in it, we can rewrite it trivially.
    if (BlockUses.size() == 1) {
      // If it is a store, it is a trivial def of the value in the block.
      if (isa<StoreInst>(User)) {
        SSA.AddAvailableValue(User->getParent(),
                              cast<StoreInst>(User)->getOperand(0));
      } else {
        // Otherwise it is a load, queue it to rewrite as a live-in load.
        LiveInLoads.push_back(cast<LoadInst>(User));
      }
      BlockUses.clear();
      continue;
    }
    
    // Otherwise, check to see if this block is all loads.  If so, we can queue
    // them all as live in loads.
    bool HasStore = false;
    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
      if (isa<StoreInst>(BlockUses[i])) {
        HasStore = true;
        break;
      }
    }
    
    if (!HasStore) {
      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
      BlockUses.clear();
      continue;
    }

    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
    // Since SSAUpdater is purely for cross-block values, we need to determine
    // the order of these instructions in the block.  If the first use in the
    // block is a load, then it uses the live in value.  The last store defines
    // the live out value.  We handle this by doing a linear scan of the block.
    BasicBlock *BB = User->getParent();
    Value *StoredValue = 0;
    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
        // If this is a load from an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(L->getOperand(0))) continue;

        // If we haven't seen a store yet, this is a live in use, otherwise
        // use the stored value.
        if (StoredValue) {
          L->replaceAllUsesWith(StoredValue);
          ReplacedLoads[L] = StoredValue;
        } else {
          LiveInLoads.push_back(L);
        }
        continue;
      }
      
      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
        // If this is a store to an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(S->getOperand(1))) continue;

        // Remember that this is the active value in the block.
        StoredValue = S->getOperand(0);
      }
    }
    
    // The last stored value that happened is the live-out for the block.
    assert(StoredValue && "Already checked that there is a store in block");
    SSA.AddAvailableValue(BB, StoredValue);
    BlockUses.clear();
  }
  
  // Now that all the intra-loop values are classified, set up the preheader.
  // It gets a load of the pointer we're promoting, and it is the live-out value
  // from the preheader.
  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
                                         Preheader->getTerminator());
  SSA.AddAvailableValue(Preheader, PreheaderLoad);

  // Now that the preheader is good to go, set up the exit blocks.  Each exit
  // block gets a store of the live-out values that feed them.  Since we've
  // already told the SSA updater about the defs in the loop and the preheader
  // definition, it is all set and we can start using it.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *ExitBlock = ExitBlocks[i];
    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
    new StoreInst(LiveInValue, SomePtr, InsertPos);
  }

  // Okay, now we rewrite all loads that use live-in values in the loop,
  // inserting PHI nodes as necessary.
  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
    LoadInst *ALoad = LiveInLoads[i];
    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
    ALoad->replaceAllUsesWith(NewVal);
    CurAST->copyValue(ALoad, NewVal);
    ReplacedLoads[ALoad] = NewVal;
  }
  
  // If the preheader load is itself a pointer, we need to tell alias analysis
  // about the new pointer we created in the preheader block and about any PHI
  // nodes that just got inserted.
  if (PreheaderLoad->getType()->isPointerTy()) {
    // Copy any value stored to or loaded from a must-alias of the pointer.
    CurAST->copyValue(SomeValue, PreheaderLoad);
    
    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
      CurAST->copyValue(SomeValue, NewPHIs[i]);
  }
  
  // Now that everything is rewritten, delete the old instructions from the body
  // of the loop.  They should all be dead now.
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    
    // If this is a load that still has uses, then the load must have been added
    // as a live value in the SSAUpdate data structure for a block (e.g. because
    // the loaded value was stored later).  In this case, we need to recursively
    // propagate the updates until we get to the real value.
    if (!User->use_empty()) {
      Value *NewVal = ReplacedLoads[User];
      assert(NewVal && "not a replaced load?");
      
      // Propagate down to the ultimate replacee.  The intermediately loads
      // could theoretically already have been deleted, so we don't want to
      // dereference the Value*'s.
      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
      while (RLI != ReplacedLoads.end()) {
        NewVal = RLI->second;
        RLI = ReplacedLoads.find(NewVal);
      }
      
      User->replaceAllUsesWith(NewVal);
      CurAST->copyValue(User, NewVal);
    }
    
    CurAST->deleteValue(User);
    User->eraseFromParent();
  }
  
  // fwew, we're done!
}
示例#13
0
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
/// position, and may either delete it or move it to outside of the loop.
///
void LICM::sink(Instruction &I) {
  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");

  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);

  if (isa<LoadInst>(I)) ++NumMovedLoads;
  else if (isa<CallInst>(I)) ++NumMovedCalls;
  ++NumSunk;
  Changed = true;

  // The case where there is only a single exit node of this loop is common
  // enough that we handle it as a special (more efficient) case.  It is more
  // efficient to handle because there are no PHI nodes that need to be placed.
  if (ExitBlocks.size() == 1) {
    if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
      // Instruction is not used, just delete it.
      CurAST->deleteValue(&I);
      // If I has users in unreachable blocks, eliminate.
      // If I is not void type then replaceAllUsesWith undef.
      // This allows ValueHandlers and custom metadata to adjust itself.
      if (!I.use_empty())
        I.replaceAllUsesWith(UndefValue::get(I.getType()));
      I.eraseFromParent();
    } else {
      // Move the instruction to the start of the exit block, after any PHI
      // nodes in it.
      I.moveBefore(ExitBlocks[0]->getFirstNonPHI());

      // This instruction is no longer in the AST for the current loop, because
      // we just sunk it out of the loop.  If we just sunk it into an outer
      // loop, we will rediscover the operation when we process it.
      CurAST->deleteValue(&I);
    }
    return;
  }
  
  if (ExitBlocks.empty()) {
    // The instruction is actually dead if there ARE NO exit blocks.
    CurAST->deleteValue(&I);
    // If I has users in unreachable blocks, eliminate.
    // If I is not void type then replaceAllUsesWith undef.
    // This allows ValueHandlers and custom metadata to adjust itself.
    if (!I.use_empty())
      I.replaceAllUsesWith(UndefValue::get(I.getType()));
    I.eraseFromParent();
    return;
  }
  
  // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
  // hard work of inserting PHI nodes as necessary.
  SmallVector<PHINode*, 8> NewPHIs;
  SSAUpdater SSA(&NewPHIs);
  
  if (!I.use_empty())
    SSA.Initialize(I.getType(), I.getName());
  
  // Insert a copy of the instruction in each exit block of the loop that is
  // dominated by the instruction.  Each exit block is known to only be in the
  // ExitBlocks list once.
  BasicBlock *InstOrigBB = I.getParent();
  unsigned NumInserted = 0;
  
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *ExitBlock = ExitBlocks[i];
    
    if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
      continue;
    
    // Insert the code after the last PHI node.
    BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
    
    // If this is the first exit block processed, just move the original
    // instruction, otherwise clone the original instruction and insert
    // the copy.
    Instruction *New;
    if (NumInserted++ == 0) {
      I.moveBefore(InsertPt);
      New = &I;
    } else {
      New = I.clone();
      if (!I.getName().empty())
        New->setName(I.getName()+".le");
      ExitBlock->getInstList().insert(InsertPt, New);
    }
    
    // Now that we have inserted the instruction, inform SSAUpdater.
    if (!I.use_empty())
      SSA.AddAvailableValue(ExitBlock, New);
  }
  
  // If the instruction doesn't dominate any exit blocks, it must be dead.
  if (NumInserted == 0) {
    CurAST->deleteValue(&I);
    if (!I.use_empty())
      I.replaceAllUsesWith(UndefValue::get(I.getType()));
    I.eraseFromParent();
    return;
  }
  
  // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
    // Grab the use before incrementing the iterator.
    Use &U = UI.getUse();
    // Increment the iterator before removing the use from the list.
    ++UI;
    SSA.RewriteUseAfterInsertions(U);
  }
  
  // Update CurAST for NewPHIs if I had pointer type.
  if (I.getType()->isPointerTy())
    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
      CurAST->copyValue(&I, NewPHIs[i]);
  
  // Finally, remove the instruction from CurAST.  It is no longer in the loop.
  CurAST->deleteValue(&I);
}
示例#14
0
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
  BasicBlock *DefBB = I->getParent();

  // If both result of the {s|z}xt and its source are live out, rewrite all
  // other uses of the source with result of extension.
  Value *Src = I->getOperand(0);
  if (Src->hasOneUse())
    return false;

  // Only do this xform if truncating is free.
  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
    return false;

  // Only safe to perform the optimization if the source is also defined in
  // this block.
  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
    return false;

  bool DefIsLiveOut = false;
  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
       UI != E; ++UI) {
    Instruction *User = cast<Instruction>(*UI);

    // Figure out which BB this ext is used in.
    BasicBlock *UserBB = User->getParent();
    if (UserBB == DefBB) continue;
    DefIsLiveOut = true;
    break;
  }
  if (!DefIsLiveOut)
    return false;

  // Make sure non of the uses are PHI nodes.
  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
       UI != E; ++UI) {
    Instruction *User = cast<Instruction>(*UI);
    BasicBlock *UserBB = User->getParent();
    if (UserBB == DefBB) continue;
    // Be conservative. We don't want this xform to end up introducing
    // reloads just before load / store instructions.
    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
      return false;
  }

  // InsertedTruncs - Only insert one trunc in each block once.
  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;

  bool MadeChange = false;
  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
       UI != E; ++UI) {
    Use &TheUse = UI.getUse();
    Instruction *User = cast<Instruction>(*UI);

    // Figure out which BB this ext is used in.
    BasicBlock *UserBB = User->getParent();
    if (UserBB == DefBB) continue;

    // Both src and def are live in this block. Rewrite the use.
    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];

    if (!InsertedTrunc) {
      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();

      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
    }

    // Replace a use of the {s|z}ext source with a use of the result.
    TheUse = InsertedTrunc;

    MadeChange = true;
  }

  return MadeChange;
}
示例#15
0
       virtual bool runOnModule(Module &M)
       {
               LLVMContext &C = M.getContext();

               //errs() << "Pass called sucessfully!\n";
               std::vector<BranchInst*> vCondBranch;

               //Function *pi = (Function*)M.getOrInsertFunction("piInt", Type::getInt32Ty(C), Type::getInt32Ty(C), NULL);
               //BasicBlock* piBlock = BasicBlock::Create(C, "piFuncBlock", pi);
               //IRBuilder<> builder(piBlock);
               //builder.CreateRet(pi->arg_begin());

               //first go through and grab all of the conditional branches
               for(Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
               {
                       for(Function::iterator b = MI->begin(), end = MI->end(); b != end; ++b)
                       {
                               //leave func defs alone
                               if (!MI->isDeclaration())
                               {
                                       if(BranchInst *bi = dyn_cast<BranchInst>(b->getTerminator()))
                                       {
                                               if(bi->isConditional())
                                               {
                                                       //errs() << "Have a conditional branch!\n";
                                                       vCondBranch.push_back(bi);
                                               }
                                       }
                               }
                       }
               }
               for(std::vector<BranchInst*>::iterator i = vCondBranch.begin(), e = vCondBranch.end(); i != e; ++i)
               {
                       DEBUG_PRINT("Examining a conditional branch!");
                       Value *ops[2];
                       if(CmpInst *cmp = dyn_cast<CmpInst>((*i)->getCondition()))
                       {
               if (cmp->getNumOperands() < 2)
                       continue;
               if (!cmp->isIntPredicate())
                   continue;

                               ops[0] = cmp->getOperand(0);
                               ops[1] = cmp->getOperand(1);

                               //for(unsigned int x = 0; x < (*i)->getNumSuccessors(); ++x)
                               //{
                                       BasicBlock* trueBlock = (*i)->getSuccessor(0);
                                       BasicBlock* falseBlock = (*i)->getSuccessor(1);

                                       //BasicBlock* curr = (*i)->getSuccessor(x);
                                       IRBuilder<> builder(trueBlock->getFirstNonPHI());
                                       //builder.SetInsertPoint(curr->getFirstNonPHI());
                                       if(!isa<Constant>(ops[0]))
                                       {
                                               if(isa<LoadInst>(ops[0]))
                                               {
                                                       pred_iterator PI = pred_begin(trueBlock);
                                                       BasicBlock* Pred = *PI;

                                                       if(++PI != pred_end(trueBlock))
                                                               continue;

                                                       PHINode* pi;

                                                       pi = PHINode::Create(ops[0]->getType(), 1, "piFunc_t", trueBlock->begin());
                                                       pi->addIncoming(ops[0], Pred);

                                                       builder.SetInsertPoint(trueBlock->getFirstNonPHI());
                                               builder.CreateStore(pi, ((LoadInst*)ops[0])->getOperand(0));

                                                       PI = pred_begin(falseBlock);
                                                       Pred = *PI;

                                                       if(++PI != pred_end(falseBlock))
                                                               continue;

                                                       pi = PHINode::Create(ops[0]->getType(), 1, "piFunc_f", falseBlock->begin());
                                                       pi->addIncoming(ops[0], Pred);

                                                       builder.SetInsertPoint(falseBlock->getFirstNonPHI());
                                               builder.CreateStore(pi, ((LoadInst*)ops[0])->getOperand(0));


                                               }
                                       }
                                       if(!isa<Constant>(ops[1]))
                                       {
                                               if(isa<LoadInst>(ops[1]))
                                               {
                                                       pred_iterator PI = pred_begin(trueBlock);
                                                       BasicBlock* Pred = *PI;

                                                       if(++PI != pred_end(trueBlock))
                                                               continue;

                                                       PHINode* pi;


                                                       pi = PHINode::Create(ops[1]->getType(), 1, "piFunc_t", trueBlock->begin());
                                                       pi->addIncoming(ops[1], Pred);

                                                       builder.SetInsertPoint(trueBlock->getFirstNonPHI());
                                                       builder.CreateStore(pi, ((LoadInst*)ops[1])->getOperand(0));

                                                       PI = pred_begin(falseBlock);
                                                       Pred = *PI;

                                                       if(++PI != pred_end(falseBlock))
                                                               continue;

                                                       pi = PHINode::Create(ops[1]->getType(), 1, "piFunc_f", falseBlock->begin());
                                                       pi->addIncoming(ops[1], Pred);

                                                       builder.SetInsertPoint(falseBlock->getFirstNonPHI());
                                                       builder.CreateStore(pi, ((LoadInst*)ops[1])->getOperand(0));
                                               }
                                       }
                               //}
                       }
               }

               DEBUG_PRINT("Done!");

               return true;
       }
示例#16
0
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      bool AllowExpensiveTripCount,
                                      bool UseEpilogRemainder,
                                      bool UnrollRemainder, LoopInfo *LI,
                                      ScalarEvolution *SE, DominatorTree *DT,
                                      AssumptionCache *AC, bool PreserveLCSSA,
                                      Loop **ResultLoop) {
  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
  LLVM_DEBUG(L->dump());
  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
                                : dbgs() << "Using prolog remainder.\n");

  // Make sure the loop is in canonical form.
  if (!L->isLoopSimplifyForm()) {
    LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
    return false;
  }

  // Guaranteed by LoopSimplifyForm.
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Header = L->getHeader();

  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());

  if (!LatchBR || LatchBR->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    LLVM_DEBUG(
        dbgs()
        << "Loop latch not terminated by a conditional branch.\n");
    return false;
  }

  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);

  if (L->contains(LatchExit)) {
    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
    // targets of the Latch be an exit block out of the loop.
    LLVM_DEBUG(
        dbgs()
        << "One of the loop latch successors must be the exit block.\n");
    return false;
  }

  // These are exit blocks other than the target of the latch exiting block.
  SmallVector<BasicBlock *, 4> OtherExits;
  bool isMultiExitUnrollingEnabled =
      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                   UseEpilogRemainder) &&
      canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                       UseEpilogRemainder);
  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
  if (!isMultiExitUnrollingEnabled &&
      (!L->getExitingBlock() || OtherExits.size())) {
    LLVM_DEBUG(
        dbgs()
        << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
           "enabled!\n");
    return false;
  }
  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count, and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item).
  // We calculate the backedge count by using getExitCount on the Latch block,
  // which is proven to be the only exiting block in this loop. This is same as
  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
  // exiting blocks).
  const SCEV *BECountSC = SE->getExitCount(L, Latch);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy()) {
    LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
    return false;
  }

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration.
  const SCEV *TripCountSC =
      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
    LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
    return false;
  }

  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(*SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount &&
      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
    LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
    return false;
  }

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth) {
    LLVM_DEBUG(
        dbgs()
        << "Count failed constraint on overflow trip count calculation.\n");
    return false;
  }

  // Loop structure is the following:
  //
  // PreHeader
  //   Header
  //   ...
  //   Latch
  // LatchExit

  BasicBlock *NewPreHeader;
  BasicBlock *NewExit = nullptr;
  BasicBlock *PrologExit = nullptr;
  BasicBlock *EpilogPreHeader = nullptr;
  BasicBlock *PrologPreHeader = nullptr;

  if (UseEpilogRemainder) {
    // If epilog remainder
    // Split PreHeader to insert a branch around loop for unrolling.
    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
    // Split LatchExit to create phi nodes from branch above.
    SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
                                     nullptr, PreserveLCSSA);
    // NewExit gets its DebugLoc from LatchExit, which is not part of the
    // original Loop.
    // Fix this by setting Loop's DebugLoc to NewExit.
    auto *NewExitTerminator = NewExit->getTerminator();
    NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
    // Split NewExit to insert epilog remainder loop.
    EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
  } else {
    // If prolog remainder
    // Split the original preheader twice to insert prolog remainder loop
    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
    PrologPreHeader->setName(Header->getName() + ".prol.preheader");
    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
                            DT, LI);
    PrologExit->setName(Header->getName() + ".prol.loopexit");
    // Split PrologExit to get NewPreHeader.
    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
  }
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // *NewPreHeader     *PrologPreHeader
  //   Header          *PrologExit
  //   ...             *NewPreHeader
  //   Latch             Header
  // *NewExit            ...
  // *EpilogPreHeader    Latch
  // LatchExit              LatchExit

  // Calculate conditions for branch around loop for unrolling
  // in epilog case and around prolog remainder loop in prolog case.
  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % loop unroll factor
  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);
  IRBuilder<> B(PreHeaderBR);
  Value *ModVal;
  // Calculate ModVal = (BECount + 1) % Count.
  // Note that TripCount is BECount + 1.
  if (isPowerOf2_32(Count)) {
    // When Count is power of 2 we don't BECount for epilog case, however we'll
    // need it for a branch around unrolling loop for prolog case.
    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
    //  1. There are no iterations to be run in the prolog/epilog loop.
    // OR
    //  2. The addition computing TripCount overflowed.
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
    // the number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).
  } else {
    // As (BECount + 1) can potentially unsigned overflow we count
    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
    Value *ModValTmp = B.CreateURem(BECount,
                                    ConstantInt::get(BECount->getType(),
                                                     Count));
    Value *ModValAdd = B.CreateAdd(ModValTmp,
                                   ConstantInt::get(ModValTmp->getType(), 1));
    // At that point (BECount % Count) + 1 could be equal to Count.
    // To handle this case we need to take mod by Count one more time.
    ModVal = B.CreateURem(ModValAdd,
                          ConstantInt::get(BECount->getType(), Count),
                          "xtraiter");
  }
  Value *BranchVal =
      UseEpilogRemainder ? B.CreateICmpULT(BECount,
                                           ConstantInt::get(BECount->getType(),
                                                            Count - 1)) :
                           B.CreateIsNotNull(ModVal, "lcmp.mod");
  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
  if (DT) {
    if (UseEpilogRemainder)
      DT->changeImmediateDominator(NewExit, PreHeader);
    else
      DT->changeImmediateDominator(PrologExit, PreHeader);
  }
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  // For unroll factor 2 remainder loop will have 1 iterations.
  // Do not create 1 iteration loop.
  bool CreateRemainderLoop = (Count != 2);

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  Loop *remainderLoop = CloneLoopBlocks(
      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
      InsertTop, InsertBot,
      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
                                F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(),
                                F->end());

  // Now the loop blocks are cloned and the other exiting blocks from the
  // remainder are connected to the original Loop's exit blocks. The remaining
  // work is to update the phi nodes in the original loop, and take in the
  // values from the cloned region.
  for (auto *BB : OtherExits) {
   for (auto &II : *BB) {

     // Given we preserve LCSSA form, we know that the values used outside the
     // loop will be used through these phi nodes at the exit blocks that are
     // transformed below.
     if (!isa<PHINode>(II))
       break;
     PHINode *Phi = cast<PHINode>(&II);
     unsigned oldNumOperands = Phi->getNumIncomingValues();
     // Add the incoming values from the remainder code to the end of the phi
     // node.
     for (unsigned i =0; i < oldNumOperands; i++){
       Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
       // newVal can be a constant or derived from values outside the loop, and
       // hence need not have a VMap value. Also, since lookup already generated
       // a default "null" VMap entry for this value, we need to populate that
       // VMap entry correctly, with the mapped entry being itself.
       if (!newVal) {
         newVal = Phi->getIncomingValue(i);
         VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
       }
       Phi->addIncoming(newVal,
                           cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
     }
   }
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
    for (BasicBlock *SuccBB : successors(BB)) {
      assert(!(any_of(OtherExits,
                      [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
               SuccBB == LatchExit) &&
             "Breaks the definition of dedicated exits!");
    }
#endif
  }

  // Update the immediate dominator of the exit blocks and blocks that are
  // reachable from the exit blocks. This is needed because we now have paths
  // from both the original loop and the remainder code reaching the exit
  // blocks. While the IDom of these exit blocks were from the original loop,
  // now the IDom is the preheader (which decides whether the original loop or
  // remainder code should run).
  if (DT && !L->getExitingBlock()) {
    SmallVector<BasicBlock *, 16> ChildrenToUpdate;
    // NB! We have to examine the dom children of all loop blocks, not just
    // those which are the IDom of the exit blocks. This is because blocks
    // reachable from the exit blocks can have their IDom as the nearest common
    // dominator of the exit blocks.
    for (auto *BB : L->blocks()) {
      auto *DomNodeBB = DT->getNode(BB);
      for (auto *DomChild : DomNodeBB->getChildren()) {
        auto *DomChildBB = DomChild->getBlock();
        if (!L->contains(LI->getLoopFor(DomChildBB)))
          ChildrenToUpdate.push_back(DomChildBB);
      }
    }
    for (auto *BB : ChildrenToUpdate)
      DT->changeImmediateDominator(BB, PreHeader);
  }

  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // NewPreHeader      PrologPreHeader
  //   Header            PrologHeader
  //   ...               ...
  //   Latch             PrologLatch
  // NewExit           PrologExit
  // EpilogPreHeader   NewPreHeader
  //   EpilogHeader      Header
  //   ...               ...
  //   EpilogLatch       Latch
  // LatchExit              LatchExit

  // Rewrite the cloned instruction operands to use the values created when the
  // clone is created.
  for (BasicBlock *BB : NewBlocks) {
    for (Instruction &I : *BB) {
      RemapInstruction(&I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    }
  }

  if (UseEpilogRemainder) {
    // Connect the epilog code to the original loop and update the
    // PHI functions.
    ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
                  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
                  PreserveLCSSA);

    // Update counter in loop for unrolling.
    // I should be multiply of Count.
    IRBuilder<> B2(NewPreHeader->getTerminator());
    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
    B2.SetInsertPoint(LatchBR);
    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                      Header->getFirstNonPHI());
    Value *IdxSub =
        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                     NewIdx->getName() + ".nsub");
    Value *IdxCmp;
    if (LatchBR->getSuccessor(0) == Header)
      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
    else
      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
    NewIdx->addIncoming(TestVal, NewPreHeader);
    NewIdx->addIncoming(IdxSub, Latch);
    LatchBR->setCondition(IdxCmp);
  } else {
    // Connect the prolog code to the original loop and update the
    // PHI functions.
    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
                  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
  }

  // If this loop is nested, then the loop unroller changes the code in the any
  // of its parent loops, so the Scalar Evolution pass needs to be run again.
  SE->forgetTopmostLoop(L);

  // Verify that the Dom Tree is correct.
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
  if (DT)
    assert(DT->verify(DominatorTree::VerificationLevel::Full));
#endif

  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
  // cannot rely on the LoopUnrollPass to do this because it only does
  // canonicalization for parent/subloops and not the sibling loops.
  if (OtherExits.size() > 0) {
    // Generate dedicated exit blocks for the original loop, to preserve
    // LoopSimplifyForm.
    formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
    // Generate dedicated exit blocks for the remainder loop if one exists, to
    // preserve LoopSimplifyForm.
    if (remainderLoop)
      formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
  }

  auto UnrollResult = LoopUnrollResult::Unmodified;
  if (remainderLoop && UnrollRemainder) {
    LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
    UnrollResult =
        UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
                   /*Force*/ false, /*AllowRuntime*/ false,
                   /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
                   /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
                   /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
                   /*ORE*/ nullptr, PreserveLCSSA);
  }

  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
    *ResultLoop = remainderLoop;
  NumRuntimeUnrolled++;
  return true;
}
示例#17
0
Function* PartialInliner::unswitchFunction(Function* F) {
  // First, verify that this function is an unswitching candidate...
  BasicBlock* entryBlock = F->begin();
  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
  if (!BR || BR->isUnconditional())
    return 0;

  BasicBlock* returnBlock = 0;
  BasicBlock* nonReturnBlock = 0;
  unsigned returnCount = 0;
  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
       SI != SE; ++SI)
    if (isa<ReturnInst>((*SI)->getTerminator())) {
      returnBlock = *SI;
      returnCount++;
    } else
      nonReturnBlock = *SI;

  if (returnCount != 1)
    return 0;

  // Clone the function, so that we can hack away on it.
  ValueToValueMapTy VMap;
  Function* duplicateFunction = CloneFunction(F, VMap,
                                              /*ModuleLevelChanges=*/false);
  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
  F->getParent()->getFunctionList().push_back(duplicateFunction);
  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);

  // Go ahead and update all uses to the duplicate, so that we can just
  // use the inliner functionality when we're done hacking.
  F->replaceAllUsesWith(duplicateFunction);

  // Special hackery is needed with PHI nodes that have inputs from more than
  // one extracted block.  For simplicity, just split the PHIs into a two-level
  // sequence of PHIs, some of which will go in the extracted region, and some
  // of which will go outside.
  BasicBlock* preReturn = newReturnBlock;
  newReturnBlock = newReturnBlock->splitBasicBlock(
                                              newReturnBlock->getFirstNonPHI());
  BasicBlock::iterator I = preReturn->begin();
  BasicBlock::iterator Ins = newReturnBlock->begin();
  while (I != preReturn->end()) {
    PHINode* OldPhi = dyn_cast<PHINode>(I);
    if (!OldPhi) break;

    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
    OldPhi->replaceAllUsesWith(retPhi);
    Ins = newReturnBlock->getFirstNonPHI();

    retPhi->addIncoming(I, preReturn);
    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
                        newEntryBlock);
    OldPhi->removeIncomingValue(newEntryBlock);

    ++I;
  }
  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);

  // Gather up the blocks that we're going to extract.
  std::vector<BasicBlock*> toExtract;
  toExtract.push_back(newNonReturnBlock);
  for (Function::iterator FI = duplicateFunction->begin(),
       FE = duplicateFunction->end(); FI != FE; ++FI)
    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
        &*FI != newNonReturnBlock)
      toExtract.push_back(FI);

  // The CodeExtractor needs a dominator tree.
  DominatorTree DT;
  DT.runOnFunction(*duplicateFunction);

  // Extract the body of the if.
  Function* extractedFunction
    = CodeExtractor(toExtract, &DT).extractCodeRegion();

  InlineFunctionInfo IFI;

  // Inline the top-level if test into all callers.
  std::vector<User*> Users(duplicateFunction->use_begin(),
                           duplicateFunction->use_end());
  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
       UI != UE; ++UI)
    if (CallInst *CI = dyn_cast<CallInst>(*UI))
      InlineFunction(CI, IFI);
    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
      InlineFunction(II, IFI);

  // Ditch the duplicate, since we're done with it, and rewrite all remaining
  // users (function pointers, etc.) back to the original function.
  duplicateFunction->replaceAllUsesWith(F);
  duplicateFunction->eraseFromParent();

  ++NumPartialInlined;

  return extractedFunction;
}
示例#18
0
void LoopInterchangeTransform::splitOuterLoopLatch() {
  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
  OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
                              OuterLoopLatch->getFirstNonPHI(), DT, LI);
}