Beispiel #1
0
Value *VectorBlockGenerator::generateStrideOneLoad(
    ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps,
    bool NegativeStride = false) {
  unsigned VectorWidth = getVectorWidth();
  const Value *Pointer = Load->getPointerOperand();
  Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
  unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;

  Value *NewPointer = nullptr;
  NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset],
                                        GlobalMaps[Offset], VLTS[Offset]);
  Value *VectorPtr =
      Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
  LoadInst *VecLoad =
      Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
  if (!Aligned)
    VecLoad->setAlignment(8);

  if (NegativeStride) {
    SmallVector<Constant *, 16> Indices;
    for (int i = VectorWidth - 1; i >= 0; i--)
      Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
    Constant *SV = llvm::ConstantVector::get(Indices);
    Value *RevVecLoad = Builder.CreateShuffleVector(
        VecLoad, VecLoad, SV, Load->getName() + "_reverse");
    return RevVecLoad;
  }

  return VecLoad;
}
Beispiel #2
0
void WorklessInstrument::CreateIfElseBlock(Loop * pLoop, vector<BasicBlock *> & vecAdded)
{
	BasicBlock * pPreHeader = pLoop->getLoopPreheader();
	BasicBlock * pHeader = pLoop->getHeader();
	Function * pInnerFunction = pPreHeader->getParent();
	Module * pModule = pPreHeader->getParent()->getParent();

	BasicBlock * pElseBody = NULL;
	TerminatorInst * pTerminator = NULL;

	BranchInst * pBranch = NULL;
	LoadInst * pLoad1 = NULL;
	LoadInst * pLoad2 = NULL;
	LoadInst * pLoadnumGlobalCounter = NULL;
	BinaryOperator * pAddOne = NULL;
	StoreInst * pStoreNew = NULL;
	CmpInst * pCmp = NULL;
	CallInst * pCall = NULL;
	StoreInst * pStore = NULL;
	AttributeSet emptySet;

	pTerminator = pPreHeader->getTerminator();
	pLoadnumGlobalCounter = new LoadInst(this->numGlobalCounter, "", false, pTerminator);
	pLoadnumGlobalCounter->setAlignment(8);
	pAddOne = BinaryOperator::Create(Instruction::Add, pLoadnumGlobalCounter, this->ConstantLong1, "add", pTerminator);
	pStoreNew = new StoreInst(pAddOne, this->numGlobalCounter, false, pTerminator);
	pStoreNew->setAlignment(8);

	pElseBody = BasicBlock::Create(pModule->getContext(), ".else.body.CPI", pInnerFunction, 0);

	pLoad2 = new LoadInst(this->CURRENT_SAMPLE, "", false, pTerminator);
	pLoad2->setAlignment(8);
	pCmp = new ICmpInst(pTerminator, ICmpInst::ICMP_SLT, pAddOne, pLoad2, "");
	pBranch = BranchInst::Create(pHeader, pElseBody, pCmp );
	ReplaceInstWithInst(pTerminator, pBranch);

	pLoad1 = new LoadInst(this->SAMPLE_RATE, "", false, pElseBody);
	pCall = CallInst::Create(this->geo, pLoad1, "", pElseBody);
  	pCall->setCallingConv(CallingConv::C);
  	pCall->setTailCall(false);
  	pCall->setAttributes(emptySet);

  	CastInst * pCast = CastInst::CreateIntegerCast(pCall, this->LongType, true, "", pElseBody);
  	//pBinary = BinaryOperator::Create(Instruction::Add, pLoad2, pCast, "add", pIfBody);
  	pStore = new StoreInst(pCast, this->CURRENT_SAMPLE, false, pElseBody);
  	pStore->setAlignment(8);

  	pStore = new StoreInst(this->ConstantLong0, this->numGlobalCounter, false, pElseBody);
  	pStore->setAlignment(8);

  	pLoad1 = new LoadInst(this->numInstances, "", false, pElseBody);
  	pLoad1->setAlignment(8);
  	pAddOne = BinaryOperator::Create(Instruction::Add, pLoad1, this->ConstantLong1, "add", pElseBody);
	pStore = new StoreInst(pAddOne, this->numInstances, false, pElseBody);
	pStore->setAlignment(8);

	vecAdded.push_back(pPreHeader);
	vecAdded.push_back(pElseBody);
}
Beispiel #3
0
extern "C" LLVMValueRef
LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMValueRef Source, const char *Name,
                        LLVMAtomicOrdering Order, unsigned Alignment) {
  LoadInst *LI = new LoadInst(unwrap(Source), 0);
  LI->setAtomic(fromRust(Order));
  LI->setAlignment(Alignment);
  return wrap(unwrap(B)->Insert(LI, Name));
}
/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                        const DataLayout *DL) {
  User *CI = cast<User>(LI.getOperand(0));
  Value *CastOp = CI->getOperand(0);

  PointerType *DestTy = cast<PointerType>(CI->getType());
  Type *DestPTy = DestTy->getElementType();
  if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {

    // If the address spaces don't match, don't eliminate the cast.
    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
      return 0;

    Type *SrcPTy = SrcTy->getElementType();

    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
         DestPTy->isVectorTy()) {
      // If the source is an array, the code below will not succeed.  Check to
      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
      // constants.
      if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
          if (ASrcTy->getNumElements() != 0) {
            Type *IdxTy = DL
                        ? DL->getIntPtrType(SrcTy)
                        : Type::getInt64Ty(SrcTy->getContext());
            Value *Idx = Constant::getNullValue(IdxTy);
            Value *Idxs[2] = { Idx, Idx };
            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
            SrcTy = cast<PointerType>(CastOp->getType());
            SrcPTy = SrcTy->getElementType();
          }

      if (IC.getDataLayout() &&
          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
            SrcPTy->isVectorTy()) &&
          // Do not allow turning this into a load of an integer, which is then
          // casted to a pointer, this pessimizes pointer analysis a lot.
          (SrcPTy->isPtrOrPtrVectorTy() ==
           LI.getType()->isPtrOrPtrVectorTy()) &&
          IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
               IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {

        // Okay, we are casting from one integer or pointer type to another of
        // the same size.  Instead of casting the pointer before the load, cast
        // the result of the loaded value.
        LoadInst *NewLoad =
          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
        NewLoad->setAlignment(LI.getAlignment());
        NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
        // Now cast the result of the load.
        return new BitCastInst(NewLoad, LI.getType());
      }
    }
  }
  return 0;
}
Beispiel #5
0
llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val)
{
   VectorType  *vectorType = VectorType::get(Type::FloatTy, 4);
   PointerType *vectorPtr  = PointerType::get(vectorType, 0);

   CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block);
   LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block);
   load->setAlignment(8);
   return load;
}
Beispiel #6
0
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
                                            LLVMValueRef source,
                                            const char* Name,
                                            AtomicOrdering order) {
    LoadInst* li = new LoadInst(unwrap(source),0);
    li->setVolatile(true);
    li->setAtomic(order);
    li->setAlignment(sizeof(intptr_t));
    return wrap(unwrap(B)->Insert(li, Name));
}
Beispiel #7
0
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
                                            LLVMValueRef source,
                                            const char* Name,
                                            AtomicOrdering order,
                                            unsigned alignment) {
    LoadInst* li = new LoadInst(unwrap(source),0);
    li->setAtomic(order);
    li->setAlignment(alignment);
    return wrap(unwrap(B)->Insert(li, Name));
}
Beispiel #8
0
LLVMValueRef 
mono_llvm_build_aligned_load (LLVMBuilderRef builder, LLVMValueRef PointerVal,
							  const char *Name, gboolean is_volatile, int alignment)
{
	LoadInst *ins;

	ins = unwrap(builder)->CreateLoad(unwrap(PointerVal), is_volatile, Name);
	ins->setAlignment (alignment);

	return wrap(ins);
}
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
                                                    BasicBlock &BB) {
  BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
  // Skip static allocas at the top of the entry block so they don't become
  // dynamic when we split the block.  If we used our optimized stack layout,
  // then there will only be one alloca and it will come first.
  for (; IP != BE; ++IP) {
    AllocaInst *AI = dyn_cast<AllocaInst>(IP);
    if (!AI || !AI->isStaticAlloca())
      break;
  }

  bool IsEntryBB = &BB == &F.getEntryBlock();
  DebugLoc EntryLoc =
      IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc();
  IRBuilder<> IRB(IP);
  IRB.SetCurrentDebugLocation(EntryLoc);
  SmallVector<Value *, 1> Indices;
  Value *GuardP = IRB.CreateAdd(
      IRB.CreatePointerCast(GuardArray, IntptrTy),
      ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4));
  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
  GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
  LoadInst *Load = IRB.CreateLoad(GuardP);
  Load->setAtomic(Monotonic);
  Load->setAlignment(4);
  Load->setMetadata(F.getParent()->getMDKindID("nosanitize"),
                    MDNode::get(*C, None));
  Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
  Instruction *Ins = SplitBlockAndInsertIfThen(
      Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
  IRB.SetInsertPoint(Ins);
  IRB.SetCurrentDebugLocation(EntryLoc);
  // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
  IRB.CreateCall(SanCovFunction, GuardP);
  IRB.CreateCall(EmptyAsm);  // Avoids callback merge.

  if (ClExperimentalTracing) {
    // Experimental support for tracing.
    // Insert a callback with the same guard variable as used for coverage.
    IRB.SetInsertPoint(IP);
    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
  }
}
Beispiel #10
0
Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
                                                    ValueMapT &BBMap) {
  const Value *Pointer = Load->getPointerOperand();
  Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
  Value *NewPointer =
      getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
  Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
                                           Load->getName() + "_p_vec_p");
  LoadInst *ScalarLoad =
      Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");

  if (!Aligned)
    ScalarLoad->setAlignment(8);

  Constant *SplatVector = Constant::getNullValue(
      VectorType::get(Builder.getInt32Ty(), getVectorWidth()));

  Value *VectorLoad = Builder.CreateShuffleVector(
      ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
  return VectorLoad;
}
LLVMValueRef
mono_llvm_build_atomic_load (LLVMBuilderRef builder, LLVMValueRef PointerVal,
                             const char *Name, gboolean is_volatile, int alignment, BarrierKind barrier)
{
    LoadInst *ins = unwrap(builder)->CreateLoad(unwrap(PointerVal), is_volatile, Name);

    ins->setAlignment (alignment);
    switch (barrier) {
    case LLVM_BARRIER_NONE:
        break;
    case LLVM_BARRIER_ACQ:
        ins->setOrdering(Acquire);
        break;
    case LLVM_BARRIER_SEQ:
        ins->setOrdering(SequentiallyConsistent);
        break;
    default:
        g_assert_not_reached ();
        break;
    }

    return wrap(ins);
}
Beispiel #12
0
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
                                                    size_t Idx, bool UseCalls) {
  BasicBlock::iterator IP = BB.getFirstInsertionPt();
  bool IsEntryBB = &BB == &F.getEntryBlock();
  DebugLoc EntryLoc;
  if (IsEntryBB) {
    if (auto SP = F.getSubprogram())
      EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
    // Keep static allocas and llvm.localescape calls in the entry block.  Even
    // if we aren't splitting the block, it's nice for allocas to be before
    // calls.
    IP = PrepareToSplitEntryBlock(BB, IP);
  } else {
    EntryLoc = IP->getDebugLoc();
  }

  IRBuilder<> IRB(&*IP);
  IRB.SetCurrentDebugLocation(EntryLoc);
  if (Options.TracePC) {
    IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
  } else if (Options.TracePCGuard) {
    //auto GuardVar = new GlobalVariable(
    //   *F.getParent(), Int64Ty, false, GlobalVariable::LinkOnceODRLinkage,
    //    Constant::getNullValue(Int64Ty), "__sancov_guard." + F.getName());
    // if (auto Comdat = F.getComdat())
    //  GuardVar->setComdat(Comdat);
    // TODO: add debug into to GuardVar.
    // GuardVar->setSection(SanCovTracePCGuardSection);
    // auto GuardPtr = IRB.CreatePointerCast(GuardVar, IntptrPtrTy);
    auto GuardPtr = IRB.CreateIntToPtr(
        IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
                      ConstantInt::get(IntptrTy, Idx * 4)),
        Int32PtrTy);
    if (!UseCalls) {
      auto GuardLoad = IRB.CreateLoad(GuardPtr);
      GuardLoad->setAtomic(AtomicOrdering::Monotonic);
      GuardLoad->setAlignment(8);
      SetNoSanitizeMetadata(GuardLoad);  // Don't instrument with e.g. asan.
      auto Cmp = IRB.CreateICmpNE(
          GuardLoad, Constant::getNullValue(GuardLoad->getType()));
      auto Ins = SplitBlockAndInsertIfThen(
          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
      IRB.SetCurrentDebugLocation(EntryLoc);
      IRB.SetInsertPoint(Ins);
    }
    IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
  } else {
    Value *GuardP = IRB.CreateAdd(
        IRB.CreatePointerCast(GuardArray, IntptrTy),
        ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
    GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
    if (Options.TraceBB) {
      IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
    } else if (UseCalls) {
      IRB.CreateCall(SanCovWithCheckFunction, GuardP);
    } else {
      LoadInst *Load = IRB.CreateLoad(GuardP);
      Load->setAtomic(AtomicOrdering::Monotonic);
      Load->setAlignment(4);
      SetNoSanitizeMetadata(Load);
      Value *Cmp =
          IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
      Instruction *Ins = SplitBlockAndInsertIfThen(
          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
      IRB.SetInsertPoint(Ins);
      IRB.SetCurrentDebugLocation(EntryLoc);
      // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
      IRB.CreateCall(SanCovFunction, GuardP);
      IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
    }
  }

  if (Options.Use8bitCounters) {
    IRB.SetInsertPoint(&*IP);
    Value *P = IRB.CreateAdd(
        IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
        ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
    P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
    LoadInst *LI = IRB.CreateLoad(P);
    Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
    StoreInst *SI = IRB.CreateStore(Inc, P);
    SetNoSanitizeMetadata(LI);
    SetNoSanitizeMetadata(SI);
  }
}
Beispiel #13
0
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
static Function *
doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
            SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
            Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
                ReplaceCallSite) {
  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  FunctionType *FTy = F->getFunctionType();
  std::vector<Type *> Params;

  using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  std::map<Argument *, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  // We need to keep the original loads for each argument and the elements
  // of the argument that are accessed.
  std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;

  // Attribute - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeSet, 8> ArgAttrVec;
  AttributeList PAL = F->getAttributes();

  // First, determine the new argument list
  unsigned ArgNo = 0;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgNo) {
    if (ByValArgsToTransform.count(&*I)) {
      // Simple byval argument? Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      StructType *STy = cast<StructType>(AgTy);
      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
      ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
                        AttributeSet());
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(&*I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;

      // There may be remaining metadata uses of the argument for things like
      // llvm.dbg.value. Replace them with undef.
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
      for (User *U : I->users()) {
        Instruction *UI = cast<Instruction>(U);
        Type *SrcTy;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          SrcTy = L->getType();
        else
          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
        IndicesVector Indices;
        Indices.reserve(UI->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(std::make_pair(SrcTy, Indices));
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(UI->user_back());
        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (const auto &ArgIndex : ArgIndices) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(
            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
            ArgIndex.second));
        ArgAttrVec.push_back(AttributeSet());
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  Type *RetTy = FTy->getReturnType();

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  // Patch the pointer to LLVM function in debug info descriptor.
  NF->setSubprogram(F->getSubprogram());
  F->setSubprogram(nullptr);

  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
               << "From: " << *F);

  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
                                       PAL.getRetAttributes(), ArgAttrVec));
  ArgAttrVec.clear();

  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
  NF->takeName(F);

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value *, 16> Args;
  while (!F->use_empty()) {
    CallSite CS(F->user_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttributeList &CallPAL = CS.getAttributes();

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgNo = 0;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
         ++I, ++AI, ++ArgNo)
      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
        Args.push_back(*AI); // Unmodified argument
        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
      } else if (ByValArgsToTransform.count(&*I)) {
        // Emit a GEP and load for each element of the struct.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(
              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
          ArgAttrVec.push_back(AttributeSet());
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value *> Ops;
        for (const auto &ArgIndex : ArgIndices) {
          Value *V = *AI;
          LoadInst *OrigLoad =
              OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
          if (!ArgIndex.second.empty()) {
            Ops.reserve(ArgIndex.second.size());
            Type *ElTy = V->getType();
            for (auto II : ArgIndex.second) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              Type *IdxTy =
                  (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
                                      : Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, II));
              // Keep track of the type we're currently indexing.
              if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
                ElTy = ElPTy->getElementType();
              else
                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
                                          V->getName() + ".idx", Call);
            Ops.clear();
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          // Transfer the AA info too.
          AAMDNodes AAInfo;
          OrigLoad->getAAMetadata(AAInfo);
          newLoad->setAAMetadata(AAInfo);

          Args.push_back(newLoad);
          ArgAttrVec.push_back(AttributeSet());
        }
      }

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
      Args.push_back(*AI);
      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
    }

    SmallVector<OperandBundleDef, 1> OpBundles;
    CS.getOperandBundlesAsDefs(OpBundles);

    CallSite NewCS;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                 Args, OpBundles, "", Call);
    } else {
      auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
      NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
      NewCS = NewCall;
    }
    NewCS.setCallingConv(CS.getCallingConv());
    NewCS.setAttributes(
        AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
                           CallPAL.getRetAttributes(), ArgAttrVec));
    NewCS->setDebugLoc(Call->getDebugLoc());
    uint64_t W;
    if (Call->extractProfTotalWeight(W))
      NewCS->setProfWeight(W);
    Args.clear();
    ArgAttrVec.clear();

    // Update the callgraph to know that the callsite has been transformed.
    if (ReplaceCallSite)
      (*ReplaceCallSite)(CS, NewCS);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(NewCS.getInstruction());
      NewCS->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  const DataLayout &DL = F->getParent()->getDataLayout();

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transferring uses of the old arguments over to
  // the new arguments, also transferring over the names as well.
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
                              I2 = NF->arg_begin();
       I != E; ++I) {
    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(&*I2);
      I2->takeName(&*I);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(&*I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = &NF->begin()->front();

      // Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
                                        I->getParamAlignment(), "", InsertPt);
      StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
                        nullptr};

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = GetElementPtrInst::Create(
            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
            InsertPt);
        I2->setName(I->getName() + "." + Twine(i));
        new StoreInst(&*I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(&*I);

      // If the alloca is used in a call, we must clear the tail flag since
      // the callee now uses an alloca from the caller.
      for (User *U : TheAlloca->users()) {
        CallInst *Call = dyn_cast<CallInst>(U);
        if (!Call)
          continue;
        Call->setTailCall(false);
      }
      continue;
    }

    if (I->use_empty())
      continue;

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
        assert(ArgIndices.begin()->second.empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName() + ".val");
        LI->replaceAllUsesWith(&*I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
                     << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             It->second != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
          NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
                     << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->user_back());
          L->replaceAllUsesWith(&*TheArg);
          L->eraseFromParent();
        }
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    std::advance(I2, ArgIndices.size());
  }

  return NF;
}
/// tryAggregating - When scanning forward over instructions, we look for
/// other loads or stores that could be aggregated with this one.
/// Returns the last instruction added (if one was added) since we might have
/// removed some loads or stores and that might invalidate an iterator.
Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr,
    bool DebugThis) {
  if (TD == 0) return 0;

  Module* M = StartInst->getParent()->getParent()->getParent();
  LLVMContext& Context = StartInst->getContext();

  Type* int8Ty = Type::getInt8Ty(Context);
  Type* sizeTy = Type::getInt64Ty(Context);
  Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace);
  bool isLoad = isa<LoadInst>(StartInst);
  bool isStore = isa<StoreInst>(StartInst);
  Instruction *lastAddedInsn = NULL;
  Instruction *LastLoadOrStore = NULL;
 
  SmallVector<Instruction*, 8> toRemove;

  // Okay, so we now have a single global load/store. Scan to find
  // all subsequent stores of the same value to offset from the same pointer.
  // Join these together into ranges, so we can decide whether contiguous blocks
  // are stored.
  MemOpRanges Ranges(*TD);
 
  // Put the first store in since we want to preserve the order.
  Ranges.addInst(0, StartInst);

  BasicBlock::iterator BI = StartInst;
  for (++BI; !isa<TerminatorInst>(BI); ++BI) {

    if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) {
      // OK!
    } else {
      // If the instruction is readnone, ignore it, otherwise bail out.  We
      // don't even allow readonly here because we don't want something like:
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
      if (BI->mayWriteToMemory())
        break;
      if (isStore && BI->mayReadFromMemory())
        break;
      continue;
    }

    if ( isStore && isa<StoreInst>(BI) ) {
      StoreInst *NextStore = cast<StoreInst>(BI);
      // If this is a store, see if we can merge it in.
      if (!NextStore->isSimple()) break;

      // Check to see if this store is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addStore(Offset, NextStore);
      LastLoadOrStore = NextStore;
    } else {
      LoadInst *NextLoad = cast<LoadInst>(BI);
      if (!NextLoad->isSimple()) break;

      // Check to see if this load is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addLoad(Offset, NextLoad);
      LastLoadOrStore = NextLoad;
    }
  }

  // If we have no ranges, then we just had a single store with nothing that
  // could be merged in.  This is a very common case of course.
  if (!Ranges.moreThanOneOp())
    return 0;

  // Divide the instructions between StartInst and LastLoadOrStore into
  // addressing, memops, and uses of memops (uses of loads)
  reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis);

  Instruction* insertBefore = StartInst;
  IRBuilder<> builder(insertBefore);

  // Now that we have full information about ranges, loop over the ranges and
  // emit memcpy's for anything big enough to be worthwhile.
  for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
       I != E; ++I) {
    const MemOpRange &Range = *I;
    Value* oldBaseI = NULL;
    Value* newBaseI = NULL;

    if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing...

    builder.SetInsertPoint(insertBefore);

    // Otherwise, we do want to transform this!  Create a new memcpy.
    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;

    if( DebugThis ) {
      errs() << "base is:";
      StartPtr->dump();
    }

    // Determine alignment
    unsigned Alignment = Range.Alignment;
    if (Alignment == 0) {
      Type *EltType =
        cast<PointerType>(StartPtr->getType())->getElementType();
      Alignment = TD->getABITypeAlignment(EltType);
    }

    Instruction *alloc = NULL;
    Value *globalPtr = NULL;

    // create temporary alloca space to communicate to/from.
    alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore,
                       Range.End-Range.Start, Alignment);

    // Generate the old and new base pointers before we output
    // anything else.
    {
      Type* iPtrTy = TD->getIntPtrType(alloc->getType());
      Type* iNewBaseTy = TD->getIntPtrType(alloc->getType());
      oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i");
      newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i");
    }

    // If storing, do the stores we had into our alloca'd region.
    if( isStore ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        StoreInst* oldStore = cast<StoreInst>(*SI);

        if( DebugThis ) {
          errs() << "have store in range:";
          oldStore->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldStore->isVolatile() || oldStore->isAtomic()));
        StoreInst* newStore =
          builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc);
        newStore->setAlignment(oldStore->getAlignment());
        newStore->takeName(oldStore);
      }
    }

    // cast the pointer that was load/stored to i8 if necessary.
    if( StartPtr->getType()->getPointerElementType() == int8Ty ) {
      globalPtr = StartPtr;
    } else {
      globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast");
    }

    // Get a Constant* for the length.
    Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false);

    // Now add the memcpy instruction
    unsigned addrSpaceDst,addrSpaceSrc;
    addrSpaceDst = addrSpaceSrc = 0;
    if( isStore ) addrSpaceDst = globalSpace;
    if( isLoad ) addrSpaceSrc = globalSpace;

    Type *types[3];
    types[0] = PointerType::get(int8Ty, addrSpaceDst);
    types[1] = PointerType::get(int8Ty, addrSpaceSrc);
    types[2] = sizeTy;

    Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types);

    Value* args[5]; // dst src len alignment isvolatile
    if( isStore ) {
      // it's a store (ie put)
      args[0] = globalPtr;
      args[1] = alloc;
    } else {
      // it's a load (ie get)
      args[0] = alloc;
      args[1] = globalPtr;
    }
    args[2] = len;
    // alignment
    args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false);
    // isvolatile
    args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false);

    Instruction* aMemCpy = builder.CreateCall(func, args);

    /*
    DEBUG(dbgs() << "Replace ops:\n";
      for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
        dbgs() << *Range.TheStores[i] << '\n';
      dbgs() << "With: " << *AMemSet << '\n');
      */

    if (!Range.TheStores.empty())
      aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc());

    lastAddedInsn = aMemCpy;

    // If loading, load from the memcpy'd region
    if( isLoad ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        LoadInst* oldLoad = cast<LoadInst>(*SI);
        if( DebugThis ) {
          errs() << "have load in range:";
          oldLoad->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldLoad->isVolatile() || oldLoad->isAtomic()));
        LoadInst* newLoad = builder.CreateLoad(ptrToAlloc);
        newLoad->setAlignment(oldLoad->getAlignment());
        oldLoad->replaceAllUsesWith(newLoad);
        newLoad->takeName(oldLoad);
        lastAddedInsn = newLoad;
      }
    }

    // Save old loads/stores for removal
    for (SmallVector<Instruction*, 16>::const_iterator
         SI = Range.TheStores.begin(),
         SE = Range.TheStores.end(); SI != SE; ++SI) {
      Instruction* insn = *SI;
      toRemove.push_back(insn);
    }
  }

  // Zap all the old loads/stores
  for (SmallVector<Instruction*, 16>::const_iterator
       SI = toRemove.begin(),
       SE = toRemove.end(); SI != SE; ++SI) {
    (*SI)->eraseFromParent();
  }

  return lastAddedInsn;
}
Beispiel #15
0
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
// upgraded intrinsic. All argument and return casting must be provided in 
// order to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
  Function *F = CI->getCalledFunction();
  LLVMContext &C = CI->getContext();
  ImmutableCallSite CS(CI);

  assert(F && "CallInst has no function associated with it.");

  if (!NewFn) {
    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
        F->getName() == "llvm.x86.sse2.loadu.dq" ||
        F->getName() == "llvm.x86.sse2.loadu.pd") {
      // Convert to a native, unaligned load.
      const Type *VecTy = CI->getType();
      const Type *IntTy = IntegerType::get(C, 128);
      IRBuilder<> Builder(C);
      Builder.SetInsertPoint(CI->getParent(), CI);

      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
                                        PointerType::getUnqual(IntTy),
                                        "cast");
      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
      LI->setAlignment(1);      // Unaligned load.
      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");

      // Fix up all the uses with our new load.
      if (!CI->use_empty())
        CI->replaceAllUsesWith(BC);

      // Remove intrinsic.
      CI->eraseFromParent();
    } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
               F->getName() == "llvm.x86.sse2.movnt.dq" ||
               F->getName() == "llvm.x86.sse2.movnt.pd" ||
               F->getName() == "llvm.x86.sse2.movnt.i") {
      IRBuilder<> Builder(C);
      Builder.SetInsertPoint(CI->getParent(), CI);

      Module *M = F->getParent();
      SmallVector<Value *, 1> Elts;
      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
      MDNode *Node = MDNode::get(C, Elts);

      Value *Arg0 = CI->getArgOperand(0);
      Value *Arg1 = CI->getArgOperand(1);

      // Convert the type of the pointer to a pointer to the stored type.
      Value *BC = Builder.CreateBitCast(Arg0,
                                        PointerType::getUnqual(Arg1->getType()),
                                        "cast");
      StoreInst *SI = Builder.CreateStore(Arg1, BC);
      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
      SI->setAlignment(16);

      // Remove intrinsic.
      CI->eraseFromParent();
    } else {
      llvm_unreachable("Unknown function for CallInst upgrade.");
    }
    return;
  }

  switch (NewFn->getIntrinsicID()) {
  case Intrinsic::prefetch: {
    IRBuilder<> Builder(C);
    Builder.SetInsertPoint(CI->getParent(), CI);
    const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());

    // Add the extra "data cache" argument
    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
                           CI->getArgOperand(2),
                           llvm::ConstantInt::get(I32Ty, 1) };
    CallInst *NewCI = CallInst::Create(NewFn, Operands,
                                       CI->getName(), CI);
    NewCI->setTailCall(CI->isTailCall());
    NewCI->setCallingConv(CI->getCallingConv());
    //  Handle any uses of the old CallInst.
    if (!CI->use_empty())
      //  Replace all uses of the old call with the new cast which has the
      //  correct type.
      CI->replaceAllUsesWith(NewCI);

    //  Clean up the old call now that it has been completely upgraded.
    CI->eraseFromParent();
    break;
  }
  }
}
Beispiel #16
0
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {

  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  const FunctionType *FTy = F->getFunctionType();
  std::vector<const Type*> Params;

  typedef std::set<IndicesVector> ScalarizeTable;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  //
  std::map<Argument*, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  std::map<IndicesVector, LoadInst*> OriginalLoads;

  // Attributes - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeWithIndex, 8> AttributesVec;
  const AttrListPtr &PAL = F->getAttributes();

  // Add any return attributes.
  if (Attributes attrs = PAL.getRetAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

  // First, determine the new argument list
  unsigned ArgIndex = 1;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgIndex) {
    if (ByValArgsToTransform.count(I)) {
      // Simple byval argument? Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      const StructType *STy = cast<StructType>(AgTy);
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
        Params.push_back(STy->getElementType(i));
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[I];
      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
           ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
        IndicesVector Indices;
        Indices.reserve(User->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(Indices);
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(User))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(User->use_back());
        OriginalLoads[Indices] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
             E = ArgIndices.end(); SI != E; ++SI) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
                                                           SI->begin(),
                                                           SI->end()));
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  // Add any function attributes.
  if (Attributes attrs = PAL.getFnAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

  const Type *RetTy = FTy->getReturnType();

  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
  // have zero fixed arguments.
  bool ExtraArgHack = false;
  if (Params.empty() && FTy->isVarArg()) {
    ExtraArgHack = true;
    Params.push_back(Type::getInt32Ty(F->getContext()));
  }

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  
  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
        << "From: " << *F);
  
  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                     AttributesVec.end()));
  AttributesVec.clear();

  F->getParent()->getFunctionList().insert(F, NF);
  NF->takeName(F);

  // Get the alias analysis information that we need to update to reflect our
  // changes.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  // Get the callgraph information that we need to update to reflect our
  // changes.
  CallGraph &CG = getAnalysis<CallGraph>();
  
  // Get a new callgraph node for NF.
  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
  

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value*, 16> Args;
  while (!F->use_empty()) {
    CallSite CS = CallSite::get(F->use_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttrListPtr &CallPAL = CS.getAttributes();

    // Add any return attributes.
    if (Attributes attrs = CallPAL.getRetAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgIndex = 1;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I, ++AI, ++ArgIndex)
      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
        Args.push_back(*AI);          // Unmodified argument

        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));

      } else if (ByValArgsToTransform.count(I)) {
        // Emit a GEP and load for each element of the struct.
        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        const StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
                                                 (*AI)->getName()+"."+utostr(i),
                                                 Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value*> Ops;
        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
               E = ArgIndices.end(); SI != E; ++SI) {
          Value *V = *AI;
          LoadInst *OrigLoad = OriginalLoads[*SI];
          if (!SI->empty()) {
            Ops.reserve(SI->size());
            const Type *ElTy = V->getType();
            for (IndicesVector::const_iterator II = SI->begin(),
                 IE = SI->end(); II != IE; ++II) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              const Type *IdxTy = (ElTy->isStructTy() ?
                    Type::getInt32Ty(F->getContext()) : 
                    Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, *II));
              // Keep track of the type we're currently indexing.
              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
                                          V->getName()+".idx", Call);
            Ops.clear();
            AA.copyValue(OrigLoad->getOperand(0), V);
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          Args.push_back(newLoad);
          AA.copyValue(OrigLoad, Args.back());
        }
      }

    if (ExtraArgHack)
      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
      Args.push_back(*AI);
      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
    }

    // Add any function attributes.
    if (Attributes attrs = CallPAL.getFnAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

    Instruction *New;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                               Args.begin(), Args.end(), "", Call);
      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                          AttributesVec.end()));
    } else {
      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
      cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                        AttributesVec.end()));
      if (cast<CallInst>(Call)->isTailCall())
        cast<CallInst>(New)->setTailCall();
    }
    Args.clear();
    AttributesVec.clear();

    // Update the alias analysis implementation to know that we are replacing
    // the old call with a new one.
    AA.replaceWithNewValue(Call, New);

    // Update the callgraph to know that the callsite has been transformed.
    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
    CalleeNode->replaceCallEdge(Call, New, NF_CGN);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(New);
      New->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transfering uses of the old arguments over to
  // the new arguments, also transfering over the names as well.
  //
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
       I2 = NF->arg_begin(); I != E; ++I) {
    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(I2);
      I2->takeName(I);
      AA.replaceWithNewValue(I, I2);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = NF->begin()->begin();

      // Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
      const StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = 
          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
                                    TheAlloca->getName()+"."+Twine(i), 
                                    InsertPt);
        I2->setName(I->getName()+"."+Twine(i));
        new StoreInst(I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(I);
      AA.replaceWithNewValue(I, TheAlloca);
      continue;
    }

    if (I->use_empty()) {
      AA.deleteValue(I);
      continue;
    }

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
        assert(ArgIndices.begin()->empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName()+".val");
        LI->replaceAllUsesWith(I2);
        AA.replaceWithNewValue(LI, I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
              << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             *It != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
            NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
              << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->use_back());
          L->replaceAllUsesWith(TheArg);
          AA.replaceWithNewValue(L, TheArg);
          L->eraseFromParent();
        }
        AA.deleteValue(GEP);
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
      ++I2;
  }

  // Notify the alias analysis implementation that we inserted a new argument.
  if (ExtraArgHack)
    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
                 NF->arg_begin());


  // Tell the alias analysis that the old function is about to disappear.
  AA.replaceWithNewValue(F, NF);

  
  NF_CGN->stealCalledFunctionsFrom(CG[F]);
  
  // Now that the old function is dead, delete it.  If there is a dangling
  // reference to the CallgraphNode, just leave the dead function around for
  // someone else to nuke.
  CallGraphNode *CGN = CG[F];
  if (CGN->getNumReferences() == 0)
    delete CG.removeFunctionFromModule(CGN);
  else
    F->setLinkage(Function::ExternalLinkage);
  
  return NF_CGN;
}
Beispiel #17
0
void WorklessInstrument::InstrumentWorkless0Star1(Module * pModule, Loop * pLoop)
{
	Function * pMain = NULL;

	if(strMainName != "" )
	{
		pMain = pModule->getFunction(strMainName.c_str());
	}
	else
	{
		pMain = pModule->getFunction("main");
	}

	LoadInst * pLoad;
	BinaryOperator* pAdd = NULL;
	StoreInst * pStore = NULL;

	for (Function::iterator BB = pMain->begin(); BB != pMain->end(); ++BB) 
	{
		if(BB->getName().equals("entry"))
		{
			CallInst * pCall;
			StoreInst * pStore;

			Instruction * II = BB->begin();
			pCall = CallInst::Create(this->InitHooks, "", II);
			pCall->setCallingConv(CallingConv::C);
			pCall->setTailCall(false);
			AttributeSet emptySet;
			pCall->setAttributes(emptySet);

			pCall = CallInst::Create(this->getenv, this->SAMPLE_RATE_ptr, "", II);
			pCall->setCallingConv(CallingConv::C);
			pCall->setTailCall(false);
			AttributeSet AS;
			{
				SmallVector<AttributeSet, 4> Attrs;
				AttributeSet PAS;
				{
					AttrBuilder B;
					B.addAttribute(Attribute::NoUnwind);
					PAS = AttributeSet::get(pModule->getContext(), ~0U, B);
				}
				Attrs.push_back(PAS);
				AS = AttributeSet::get(pModule->getContext(), Attrs);
			}
			pCall->setAttributes(AS);

			pCall = CallInst::Create(this->function_atoi, pCall, "", II);
			pCall->setCallingConv(CallingConv::C);
			pCall->setTailCall(false);
			{
  				SmallVector<AttributeSet, 4> Attrs;
   				AttributeSet PAS;
    			{
    	 			AttrBuilder B;
     				B.addAttribute(Attribute::NoUnwind);
     				B.addAttribute(Attribute::ReadOnly);
     				PAS = AttributeSet::get(pModule->getContext(), ~0U, B);
    			}
   
   				Attrs.push_back(PAS);
   				AS = AttributeSet::get(pModule->getContext(), Attrs);
   
  			}
  			pCall->setAttributes(AS);

  			pStore = new StoreInst(pCall, this->SAMPLE_RATE, false, II);
  			pStore->setAlignment(4);

  			pCall = CallInst::Create(this->geo, pCall, "", II);
  			pCall->setCallingConv(CallingConv::C);
  			pCall->setTailCall(false);
  			pCall->setAttributes(emptySet);

  			CastInst * pCast = CastInst::CreateIntegerCast(pCall, this->LongType, true, "", II);
  			pStore = new StoreInst(pCast, this->CURRENT_SAMPLE, false, II);
  			pStore->setAlignment(8);

  			vector<Value *> vecParam;
  			vecParam.push_back(this->Output_Format_String);
  			vecParam.push_back(pCall);
  			pCall = CallInst::Create(this->printf, vecParam, "", II);
  			pCall->setCallingConv(CallingConv::C);
  			pCall->setTailCall(false);
  			pCall->setAttributes(emptySet);
  			break;
		}
	}

	for (Function::iterator BB = pMain->begin(); BB != pMain->end(); ++BB) 
	{		
		for (BasicBlock::iterator Ins = BB->begin(); Ins != BB->end(); ++Ins) 
		{
			if (isa<ReturnInst>(Ins) || isa<ResumeInst>(Ins)) 
			{
				vector<Value*> vecParams;
				pLoad = new LoadInst(numIterations, "", false, Ins); 
				pLoad->setAlignment(8); 
				vecParams.push_back(pLoad);
				pLoad = new LoadInst(numInstances, "", false, Ins); 
				pLoad->setAlignment(8);
				vecParams.push_back(pLoad);
				
				CallInst* pCall = CallInst::Create(this->PrintLoopInfo, vecParams, "", Ins);
				pCall->setCallingConv(CallingConv::C);
				pCall->setTailCall(false);
				AttributeSet aSet;
				pCall->setAttributes(aSet);
			}
			else if(isa<CallInst>(Ins) || isa<InvokeInst>(Ins))
			{
				CallSite cs(Ins);
				Function * pCalled = cs.getCalledFunction();

				if(pCalled == NULL)
				{
					continue;
				}

				if(pCalled->getName() == "exit" || pCalled->getName() == "_ZL9mysql_endi")
				{
					vector<Value*> vecParams;
					pLoad = new LoadInst(numIterations, "", false, Ins); 
					pLoad->setAlignment(8); 
					vecParams.push_back(pLoad);
					pLoad = new LoadInst(numInstances, "", false, Ins); 
					pLoad->setAlignment(8);
					vecParams.push_back(pLoad);
				
					CallInst* pCall = CallInst::Create(this->PrintLoopInfo, vecParams, "", Ins);
					pCall->setCallingConv(CallingConv::C);
					pCall->setTailCall(false);
					AttributeSet aSet;
					pCall->setAttributes(aSet);
				}
			}
		}
	}



	BasicBlock * pHeader = pLoop->getHeader();
	set<BasicBlock *> setExitBlock;
	CollectExitBlock(pLoop, setExitBlock);

	vector<BasicBlock *> vecAdded;
	CreateIfElseBlock(pLoop, vecAdded);

	ValueToValueMapTy  VMap;
	set<BasicBlock *> setCloned;
	CloneInnerLoop(pLoop, vecAdded, VMap, setCloned);

	BasicBlock * pPreHeader = vecAdded[1];
	pLoad = new LoadInst(this->numIterations, "", false, pPreHeader->getTerminator());
	pLoad->setAlignment(8);

	BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pHeader]);

	set<BasicBlock *> setPredBlocks;

	for(pred_iterator PI = pred_begin(pClonedHeader), E = pred_end(pClonedHeader); PI != E; ++PI)
	{
		setPredBlocks.insert(*PI);
	}

	PHINode * pNew = PHINode::Create(pLoad->getType(), setPredBlocks.size(), "numIterations", pClonedHeader->getFirstInsertionPt());
	pAdd = BinaryOperator::Create(Instruction::Add, pNew, this->ConstantLong1, "add", pClonedHeader->getFirstInsertionPt());

	set<BasicBlock *>::iterator itSetBegin = setPredBlocks.begin();
	set<BasicBlock *>::iterator itSetEnd   = setPredBlocks.end();

	for(; itSetBegin != itSetEnd; itSetBegin ++ )
	{
		if((*itSetBegin) == pPreHeader)
		{
			pNew->addIncoming(pLoad, pPreHeader);
		}
		else
		{
			pNew->addIncoming(pAdd, *itSetBegin);
		}
	}


	itSetBegin = setExitBlock.begin();
	itSetEnd   = setExitBlock.end();

	for(; itSetBegin != itSetEnd; itSetBegin ++ )
	{
		SmallVector<BasicBlock*, 8> LoopBlocks;

		for(pred_iterator PI = pred_begin(*itSetBegin), E = pred_end(*itSetBegin); PI != E; ++PI)
		{
			if(setCloned.find(*PI) != setCloned.end())
			{
				LoopBlocks.push_back(*PI);
			}
		}

		BasicBlock * NewExitBB = SplitBlockPredecessors(*itSetBegin, LoopBlocks, ".WL.loopexit", this);

		pStore = new StoreInst(pAdd, this->numIterations, false, NewExitBB->getFirstInsertionPt());
		pStore->setAlignment(8);
	}

	pPreHeader->getParent()->dump();

}
Beispiel #18
0
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop.  We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
void LICM::PromoteAliasSet(AliasSet &AS) {
  // We can promote this alias set if it has a store, if it is a "Must" alias
  // set, if the pointer is loop invariant, and if we are not eliminating any
  // volatile loads or stores.
  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
    return;

  assert(!AS.empty() &&
         "Must alias set should have at least one pointer element in it!");
  Value *SomePtr = AS.begin()->getValue();

  // It isn't safe to promote a load/store from the loop if the load/store is
  // conditional.  For example, turning:
  //
  //    for () { if (c) *P += 1; }
  //
  // into:
  //
  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
  //
  // is not safe, because *P may only be valid to access if 'c' is true.
  //
  // It is safe to promote P if all uses are direct load/stores and if at
  // least one is guaranteed to be executed.
  bool GuaranteedToExecute = false;

  SmallVector<Instruction*, 64> LoopUses;
  SmallPtrSet<Value*, 4> PointerMustAliases;

  // We start with an alignment of one and try to find instructions that allow
  // us to prove better alignment.
  unsigned Alignment = 1;

  // Check that all of the pointers in the alias set have the same type.  We
  // cannot (yet) promote a memory location that is loaded and stored in
  // different sizes.
  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
    Value *ASIV = ASI->getValue();
    PointerMustAliases.insert(ASIV);

    // Check that all of the pointers in the alias set have the same type.  We
    // cannot (yet) promote a memory location that is loaded and stored in
    // different sizes.
    if (SomePtr->getType() != ASIV->getType())
      return;

    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
         UI != UE; ++UI) {
      // Ignore instructions that are outside the loop.
      Instruction *Use = dyn_cast<Instruction>(*UI);
      if (!Use || !CurLoop->contains(Use))
        continue;

      // If there is an non-load/store instruction in the loop, we can't promote
      // it.
      if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
        assert(!load->isVolatile() && "AST broken");
        if (!load->isSimple())
          return;
      } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
        // Stores *of* the pointer are not interesting, only stores *to* the
        // pointer.
        if (Use->getOperand(1) != ASIV)
          continue;
        assert(!store->isVolatile() && "AST broken");
        if (!store->isSimple())
          return;

        // Note that we only check GuaranteedToExecute inside the store case
        // so that we do not introduce stores where they did not exist before
        // (which would break the LLVM concurrency model).

        // If the alignment of this instruction allows us to specify a more
        // restrictive (and performant) alignment and if we are sure this
        // instruction will be executed, update the alignment.
        // Larger is better, with the exception of 0 being the best alignment.
        unsigned InstAlignment = store->getAlignment();
        if ((InstAlignment > Alignment || InstAlignment == 0)
            && (Alignment != 0))
          if (isGuaranteedToExecute(*Use)) {
            GuaranteedToExecute = true;
            Alignment = InstAlignment;
          }

        if (!GuaranteedToExecute)
          GuaranteedToExecute = isGuaranteedToExecute(*Use);

      } else
        return; // Not a load or store.

      LoopUses.push_back(Use);
    }
  }

  // If there isn't a guaranteed-to-execute instruction, we can't promote.
  if (!GuaranteedToExecute)
    return;

  // Otherwise, this is safe to promote, lets do it!
  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
  Changed = true;
  ++NumPromoted;

  // Grab a debug location for the inserted loads/stores; given that the
  // inserted loads/stores have little relation to the original loads/stores,
  // this code just arbitrarily picks a location from one, since any debug
  // location is better than none.
  DebugLoc DL = LoopUses[0]->getDebugLoc();

  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);

  // We use the SSAUpdater interface to insert phi nodes as required.
  SmallVector<PHINode*, 16> NewPHIs;
  SSAUpdater SSA(&NewPHIs);
  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                        *CurAST, DL, Alignment);

  // Set up the preheader to have a definition of the value.  It is the live-out
  // value from the preheader that uses in the loop will use.
  LoadInst *PreheaderLoad =
    new LoadInst(SomePtr, SomePtr->getName()+".promoted",
                 Preheader->getTerminator());
  PreheaderLoad->setAlignment(Alignment);
  PreheaderLoad->setDebugLoc(DL);
  SSA.AddAvailableValue(Preheader, PreheaderLoad);

  // Rewrite all the loads in the loop and remember all the definitions from
  // stores in the loop.
  Promoter.run(LoopUses);

  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
  if (PreheaderLoad->use_empty())
    PreheaderLoad->eraseFromParent();
}
Beispiel #19
0
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
                                    CreateCmpXchgInstFun CreateCmpXchg) {
  assert(AI);

  AtomicOrdering MemOpOrder =
      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
  Value *Addr = AI->getPointerOperand();
  BasicBlock *BB = AI->getParent();
  Function *F = BB->getParent();
  LLVMContext &Ctx = F->getContext();

  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  //
  // The standard expansion we produce is:
  //     [...]
  //     %init_loaded = load atomic iN* %addr
  //     br label %loop
  // loop:
  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
  //     %new = some_op iN %loaded, %incr
  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
  //     %success = extractvalue { iN, i1 } %pair, 1
  //     br i1 %success, label %atomicrmw.end, label %loop
  // atomicrmw.end:
  //     [...]
  BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);

  // This grabs the DebugLoc from AI.
  IRBuilder<> Builder(AI);

  // The split call above "helpfully" added a branch at the end of BB (to the
  // wrong place), but we want a load. It's easiest to just remove
  // the branch entirely.
  std::prev(BB->end())->eraseFromParent();
  Builder.SetInsertPoint(BB);
  LoadInst *InitLoaded = Builder.CreateLoad(Addr);
  // Atomics require at least natural alignment.
  InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
  Builder.CreateBr(LoopBB);

  // Start the main loop block now that we've taken care of the preliminaries.
  Builder.SetInsertPoint(LoopBB);
  PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
  Loaded->addIncoming(InitLoaded, BB);

  Value *NewVal =
      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());

  Value *NewLoaded = nullptr;
  Value *Success = nullptr;

  CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
                Success, NewLoaded);
  assert(Success && NewLoaded);

  Loaded->addIncoming(NewLoaded, LoopBB);

  Builder.CreateCondBr(Success, ExitBB, LoopBB);

  Builder.SetInsertPoint(ExitBB, ExitBB->begin());

  AI->replaceAllUsesWith(NewLoaded);
  AI->eraseFromParent();

  return true;
}
Beispiel #20
0
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
  Type *ScalarTy = VL[0]->getType();
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
    ScalarTy = SI->getValueOperand()->getType();
  VectorType *VecTy = VectorType::get(ScalarTy, VF);

  // Check if all of the operands are constants or identical.
  bool AllConst = true;
  bool AllSameScalar = true;
  for (unsigned i = 0, e = VF; i < e; ++i) {
    AllConst &= !!dyn_cast<Constant>(VL[i]);
    AllSameScalar &= (VL[0] == VL[i]);
    // Must have a single use.
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    if (I && (I->getNumUses() > 1 || I->getParent() != BB))
      return Scalarize(VL, VecTy);
  }

  // Is this a simple vector constant.
  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);

  // Scalarize unknown structures.
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
  if (!VL0) return Scalarize(VL, VecTy);

  unsigned Opcode = VL0->getOpcode();
  for (unsigned i = 0, e = VF; i < e; ++i) {
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    // If not all of the instructions are identical then we have to scalarize.
    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
  }

  switch (Opcode) {
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    ValueList LHSVL, RHSVL;
    for (int i = 0; i < VF; ++i) {
      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
    }

    Value *RHS = vectorizeTree(RHSVL, VF);
    Value *LHS = vectorizeTree(LHSVL, VF);
    IRBuilder<> Builder(GetLastInstr(VL, VF));
    BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
    return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
  }
  case Instruction::Load: {
    LoadInst *LI = dyn_cast<LoadInst>(VL0);
    unsigned Alignment = LI->getAlignment();

    // Check if all of the loads are consecutive.
    for (unsigned i = 1, e = VF; i < e; ++i)
      if (!isConsecutiveAccess(VL[i-1], VL[i]))
        return Scalarize(VL, VecTy);

    IRBuilder<> Builder(GetLastInstr(VL, VF));
    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
                                          VecTy->getPointerTo());
    LI = Builder.CreateLoad(VecPtr);
    LI->setAlignment(Alignment);
    return LI;
  }
  case Instruction::Store: {
    StoreInst *SI = dyn_cast<StoreInst>(VL0);
    unsigned Alignment = SI->getAlignment();

    ValueList ValueOp;
    for (int i = 0; i < VF; ++i)
      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());

    Value *VecValue = vectorizeTree(ValueOp, VF);

    IRBuilder<> Builder(GetLastInstr(VL, VF));
    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
                                          VecTy->getPointerTo());
    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);

    for (int i = 0; i < VF; ++i)
      cast<Instruction>(VL[i])->eraseFromParent();
    return 0;
  }
  default:
    return Scalarize(VL, VecTy);
  }
}
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
                                                    bool UseCalls) {
  // Don't insert coverage for unreachable blocks: we will never call
  // __sanitizer_cov() for them, so counting them in
  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
  // percentage. Also, unreachable instructions frequently have no debug
  // locations.
  if (isa<UnreachableInst>(BB.getTerminator()))
    return;
  BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
  // Skip static allocas at the top of the entry block so they don't become
  // dynamic when we split the block.  If we used our optimized stack layout,
  // then there will only be one alloca and it will come first.
  for (; IP != BE; ++IP) {
    AllocaInst *AI = dyn_cast<AllocaInst>(IP);
    if (!AI || !AI->isStaticAlloca())
      break;
  }

  bool IsEntryBB = &BB == &F.getEntryBlock();
  DebugLoc EntryLoc;
  if (IsEntryBB) {
    if (auto SP = getDISubprogram(&F))
      EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
  } else {
    EntryLoc = IP->getDebugLoc();
  }

  IRBuilder<> IRB(IP);
  IRB.SetCurrentDebugLocation(EntryLoc);
  SmallVector<Value *, 1> Indices;
  Value *GuardP = IRB.CreateAdd(
      IRB.CreatePointerCast(GuardArray, IntptrTy),
      ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
  GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
  if (UseCalls) {
    IRB.CreateCall(SanCovWithCheckFunction, GuardP);
  } else {
    LoadInst *Load = IRB.CreateLoad(GuardP);
    Load->setAtomic(Monotonic);
    Load->setAlignment(4);
    SetNoSanitizeMetadata(Load);
    Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
    Instruction *Ins = SplitBlockAndInsertIfThen(
        Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
    IRB.SetInsertPoint(Ins);
    IRB.SetCurrentDebugLocation(EntryLoc);
    // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
    IRB.CreateCall(SanCovFunction, GuardP);
    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
  }

  if (Options.Use8bitCounters) {
    IRB.SetInsertPoint(IP);
    Value *P = IRB.CreateAdd(
        IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
        ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
    P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
    LoadInst *LI = IRB.CreateLoad(P);
    Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
    StoreInst *SI = IRB.CreateStore(Inc, P);
    SetNoSanitizeMetadata(LI);
    SetNoSanitizeMetadata(SI);
  }

  if (Options.TraceBB) {
    // Experimental support for tracing.
    // Insert a callback with the same guard variable as used for coverage.
    IRB.SetInsertPoint(IP);
    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
  }
}
Beispiel #22
0
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
                                                    bool UseCalls) {
  // Don't insert coverage for unreachable blocks: we will never call
  // __sanitizer_cov() for them, so counting them in
  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
  // percentage. Also, unreachable instructions frequently have no debug
  // locations.
  if (isa<UnreachableInst>(BB.getTerminator()))
    return;
  BasicBlock::iterator IP = BB.getFirstInsertionPt();

  bool IsEntryBB = &BB == &F.getEntryBlock();
  DebugLoc EntryLoc;
  if (IsEntryBB) {
    if (auto SP = getDISubprogram(&F))
      EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
    // Keep static allocas and llvm.localescape calls in the entry block.  Even
    // if we aren't splitting the block, it's nice for allocas to be before
    // calls.
    IP = PrepareToSplitEntryBlock(BB, IP);
  } else {
    EntryLoc = IP->getDebugLoc();
  }

  IRBuilder<> IRB(&*IP);
  IRB.SetCurrentDebugLocation(EntryLoc);
  Value *GuardP = IRB.CreateAdd(
      IRB.CreatePointerCast(GuardArray, IntptrTy),
      ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
  GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
  if (Options.TracePC) {
    IRB.CreateCall(SanCovTracePC);
  } else if (Options.TraceBB) {
    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
  } else if (UseCalls) {
    IRB.CreateCall(SanCovWithCheckFunction, GuardP);
  } else {
    LoadInst *Load = IRB.CreateLoad(GuardP);
    Load->setAtomic(Monotonic);
    Load->setAlignment(4);
    SetNoSanitizeMetadata(Load);
    Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
    Instruction *Ins = SplitBlockAndInsertIfThen(
        Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
    IRB.SetInsertPoint(Ins);
    IRB.SetCurrentDebugLocation(EntryLoc);
    // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
    IRB.CreateCall(SanCovFunction, GuardP);
    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
  }

  if (Options.Use8bitCounters) {
    IRB.SetInsertPoint(&*IP);
    Value *P = IRB.CreateAdd(
        IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
        ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
    P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
    LoadInst *LI = IRB.CreateLoad(P);
    Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
    StoreInst *SI = IRB.CreateStore(Inc, P);
    SetNoSanitizeMetadata(LI);
    SetNoSanitizeMetadata(SI);
  }
}