コード例 #1
0
/// Infer nonnull attributes for the arguments at the specified callsite.
static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
  SmallVector<unsigned, 4> Indices;
  unsigned ArgNo = 0;

  for (Value *V : CS.args()) {
    PointerType *Type = dyn_cast<PointerType>(V->getType());
    // Try to mark pointer typed parameters as non-null.  We skip the
    // relatively expensive analysis for constants which are obviously either
    // null or non-null to start with.
    if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
        !isa<Constant>(V) && 
        LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
                            ConstantPointerNull::get(Type),
                            CS.getInstruction()) == LazyValueInfo::False)
      Indices.push_back(ArgNo + 1);
    ArgNo++;
  }

  assert(ArgNo == CS.arg_size() && "sanity check");

  if (Indices.empty())
    return false;

  AttributeSet AS = CS.getAttributes();
  LLVMContext &Ctx = CS.getInstruction()->getContext();
  AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
  CS.setAttributes(AS);

  return true;
}
コード例 #2
0
//
// Method: visitRuntimeCheck()
//
// Description:
//  Visit a call to a run-time check (or related function) and insert pool
//  arguments where needed. PoolArgc is the number of initial pool arguments
//  that should be filled at the call site with pool handles for the
//  corresponding pointer arguments.
//
void
FuncTransform::visitRuntimeCheck (CallSite CS, const unsigned PoolArgc) {
  // A call to the runtime check should have positions for each pool argument
  // and the corresponding pointer.
  assert ((CS.arg_size() >= 2 * PoolArgc) &&
    "Not enough arguments to call of a runtime check!");

  for (unsigned PoolIndex = 0; PoolIndex < PoolArgc; ++PoolIndex) {
    //
    // Get the pool handle for the pointer argument.
    //
    Value *PH =
      getPoolHandle(CS.getArgument(PoolArgc + PoolIndex)->stripPointerCasts());

    //
    // Insert the pool handle into the run-time check.
    //
    if (PH) {
      Type * Int8Type  = Type::getInt8Ty(CS.getInstruction()->getContext());
      Type * VoidPtrTy = PointerType::getUnqual(Int8Type);
      PH = castTo (PH, VoidPtrTy, PH->getName(), CS.getInstruction());
      CS.setArgument (PoolIndex, PH);

      //
      // Record that we've used the pool here.
      //
      AddPoolUse (*(CS.getInstruction()), PH, PoolUses);
    }
  }
}
コード例 #3
0
ファイル: InlineCost.cpp プロジェクト: headmyshoulder/llvm
/// \brief Try to simplify a call site.
///
/// Takes a concrete function and callsite and tries to actually simplify it by
/// analyzing the arguments and call itself with instsimplify. Returns true if
/// it has simplified the callsite to some other entity (a constant), making it
/// free.
bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
  // FIXME: Using the instsimplify logic directly for this is inefficient
  // because we have to continually rebuild the argument list even when no
  // simplifications can be performed. Until that is fixed with remapping
  // inside of instsimplify, directly constant fold calls here.
  if (!canConstantFoldCallTo(F))
    return false;

  // Try to re-map the arguments to constants.
  SmallVector<Constant *, 4> ConstantArgs;
  ConstantArgs.reserve(CS.arg_size());
  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
       I != E; ++I) {
    Constant *C = dyn_cast<Constant>(*I);
    if (!C)
      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
    if (!C)
      return false; // This argument doesn't map to a constant.

    ConstantArgs.push_back(C);
  }
  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
    SimplifiedValues[CS.getInstruction()] = C;
    return true;
  }

  return false;
}
コード例 #4
0
void MemoryInstrumenter::instrumentCallSite(CallSite CS) {
  IDAssigner &IDA = getAnalysis<IDAssigner>();
  unsigned InsID = IDA.getInstructionID(CS.getInstruction());
  assert(InsID != IDAssigner::InvalidID);

  int NumCallingArgs = CS.arg_size();
  vector<Value *> Args;
  Args.push_back(ConstantInt::get(IntType, InsID));
  Args.push_back(ConstantInt::get(IntType, NumCallingArgs));
  CallInst::Create(CallHook, Args, "", CS.getInstruction());
}
コード例 #5
0
// Add calls to the registration functions around this call site.
void RegisterVarargCallSites::registerCallSite(Module &M, CallSite &CS) {
  // Insert the registration intrinsics.
  if (registrationFunc == 0 || unregistrationFunc == 0)
    makeRegistrationFunctions(M);
  Instruction *inst = CS.getInstruction();
  LLVMContext &C = M.getContext();
  Type *VoidPtrTy = Type::getInt8PtrTy(C);
  Type *Int32Ty   = Type::getInt32Ty(C);
  // Build the argument vector to vacallregister.
  vector<Value *> vaCallRegisterArgs(2);
  Value *dest = CS.getCalledValue();
  Value *destPtr;
  // Get the function pointer casted to i8*.
  if (isa<Constant>(dest))
    destPtr = ConstantExpr::getPointerCast(cast<Constant>(dest), VoidPtrTy);
  else 
    destPtr = new BitCastInst(dest, VoidPtrTy, "", inst);
  vaCallRegisterArgs[0] = destPtr;
  vaCallRegisterArgs[1] = ConstantInt::get(Int32Ty, CS.arg_size());
  // Register all the pointer arguments to this function call as well.
  set<Value *> pointerArguments;
  CallSite::arg_iterator arg = CS.arg_begin();
  CallSite::arg_iterator end = CS.arg_end();
  for (; arg != end; ++arg) {
    Value *argval = *arg;
    if (isa<PointerType>(argval->getType())) {
      if (pointerArguments.find(argval) == pointerArguments.end()) {
        pointerArguments.insert(argval);
        vaCallRegisterArgs.push_back(argval);
      }
    }
  }
  // End the argument list with a NULL parameter.
  vaCallRegisterArgs.push_back(
    ConstantPointerNull::get(cast<PointerType>(VoidPtrTy))
  );
  // Add the registration call before the call site.
  CallInst::Create(registrationFunc, vaCallRegisterArgs, "", inst);
  // Add the unregistration call after the call site.
  Instruction *unreg = CallInst::Create(unregistrationFunc);
  unreg->insertAfter(inst);
  return;
}
コード例 #6
0
/// processCallSite - Infer nonnull attributes for the arguments at the
/// specified callsite.
bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
  bool Changed = false;

  unsigned ArgNo = 0;
  for (Value *V : CS.args()) {
    PointerType *Type = dyn_cast<PointerType>(V->getType());

    if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
        LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
                            ConstantPointerNull::get(Type),
                            CS.getInstruction()) == LazyValueInfo::False) {
      AttributeSet AS = CS.getAttributes();
      AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo + 1,
                           Attribute::NonNull);
      CS.setAttributes(AS);
      Changed = true;
    }
    ArgNo++;
  }
  assert(ArgNo == CS.arg_size() && "sanity check");

  return Changed;
}
コード例 #7
0
ファイル: InlineCost.cpp プロジェクト: headmyshoulder/llvm
bool CallAnalyzer::visitCallSite(CallSite CS) {
  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
      !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                      Attribute::ReturnsTwice)) {
    // This aborts the entire analysis.
    ExposesReturnsTwice = true;
    return false;
  }
  if (CS.isCall() &&
      cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
    ContainsNoDuplicateCall = true;

  if (Function *F = CS.getCalledFunction()) {
    // When we have a concrete function, first try to simplify it directly.
    if (simplifyCallSite(F, CS))
      return true;

    // Next check if it is an intrinsic we know about.
    // FIXME: Lift this into part of the InstVisitor.
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
      switch (II->getIntrinsicID()) {
      default:
        return Base::visitCallSite(CS);

      case Intrinsic::memset:
      case Intrinsic::memcpy:
      case Intrinsic::memmove:
        // SROA can usually chew through these intrinsics, but they aren't free.
        return false;
      }
    }

    if (F == CS.getInstruction()->getParent()->getParent()) {
      // This flag will fully abort the analysis, so don't bother with anything
      // else.
      IsRecursiveCall = true;
      return false;
    }

    if (!callIsSmall(CS)) {
      // We account for the average 1 instruction per call argument setup
      // here.
      Cost += CS.arg_size() * InlineConstants::InstrCost;

      // Everything other than inline ASM will also have a significant cost
      // merely from making the call.
      if (!isa<InlineAsm>(CS.getCalledValue()))
        Cost += InlineConstants::CallPenalty;
    }

    return Base::visitCallSite(CS);
  }

  // Otherwise we're in a very special case -- an indirect function call. See
  // if we can be particularly clever about this.
  Value *Callee = CS.getCalledValue();

  // First, pay the price of the argument setup. We account for the average
  // 1 instruction per call argument setup here.
  Cost += CS.arg_size() * InlineConstants::InstrCost;

  // Next, check if this happens to be an indirect function call to a known
  // function in this inline context. If not, we've done all we can.
  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
  if (!F)
    return Base::visitCallSite(CS);

  // If we have a constant that we are calling as a function, we can peer
  // through it and see the function target. This happens not infrequently
  // during devirtualization and so we want to give it a hefty bonus for
  // inlining, but cap that bonus in the event that inlining wouldn't pan
  // out. Pretend to inline the function, with a custom threshold.
  CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold);
  if (CA.analyzeCall(CS)) {
    // We were able to inline the indirect call! Subtract the cost from the
    // bonus we want to apply, but don't go below zero.
    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
  }

  return Base::visitCallSite(CS);
}
コード例 #8
0
// InlineFunction - This function inlines the called function into the basic
// block of the caller.  This returns false if it is not possible to inline this
// call.  The program is still in a well defined state if this occurs though.
//
// Note that this only does one level of inlining.  For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
// exists in the instruction stream.  Similiarly this will inline a recursive
// function by one level.
//
bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;


  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  //
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  std::vector<ReturnInst*> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy ValueMap after cloning.
    DenseMap<const Value*, Value*> ValueMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
          !CalledFunc->onlyReadsMemory()) {
        const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);

        // Create the alloca.  If we have TargetData, use nice alignment.
        unsigned Align = 1;
        if (TD) Align = TD->getPrefTypeAlignment(AggTy);
        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
                                          Caller->begin()->begin());
        // Emit a memcpy.
        const Type *Tys[] = { Type::Int64Ty };
        Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                       Intrinsic::memcpy, 
                                                       Tys, 1);
        Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
        Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);

        Value *Size;
        if (TD == 0)
          Size = ConstantExpr::getSizeOf(AggTy);
        else
          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));

        // Always generate a memcpy of alignment 1 here because we don't know
        // the alignment of the src pointer.  Other optimizations can infer
        // better alignment.
        Value *CallArgs[] = {
          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
        };
        CallInst *TheMemCpy =
          CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);

        // If we have a call graph, update it.
        if (CG) {
          CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
          CallGraphNode *CallerNode = (*CG)[Caller];
          CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
        }

        // Uses of the argument in the function should use our new alloca
        // instead.
        ActualArg = NewAlloca;
      }

      ValueMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
                              &InlinedFunctionInfo, TD);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  //
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
           E = FirstNewBlock->end(); I != E; )
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
        // If the alloca is now dead, remove it.  This often occurs due to code
        // specialization.
        if (AI->use_empty()) {
          AI->eraseFromParent();
          continue;
        }

        if (isa<Constant>(AI->getArraySize())) {
          // Scan for the block of allocas that we can move over, and move them
          // all at once.
          while (isa<AllocaInst>(I) &&
                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
            ++I;

          // Transfer all of the allocas over in a block.  Using splice means
          // that the instructions aren't removed from the symbol table, then
          // reinserted.
          Caller->getEntryBlock().getInstList().splice(
              InsertPoint,
              FirstNewBlock->getInstList(),
              AI, I);
        }
      }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Constant *StackSave, *StackRestore;
    StackSave    = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore);

    // If we are preserving the callgraph, add edges to the stacksave/restore
    // functions for the calls we insert.
    CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
    if (CG) {
      // We know that StackSave/StackRestore are Function*'s, because they are
      // intrinsics which must have the right types.
      StackSaveCGN    = CG->getOrInsertFunction(cast<Function>(StackSave));
      StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
      CallerNode = (*CG)[Caller];
    }

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
                                          FirstNewBlock->begin());
    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
    }

    // Count the number of StackRestore calls we insert.
    unsigned NumStackRestores = Returns.size();

    // If we are inlining an invoke instruction, insert restores before each
    // unwind.  These unwinds will be rewritten into branches later.
    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
           BB != E; ++BB)
        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
          CallInst::Create(StackRestore, SavedPtr, "", UI);
          ++NumStackRestores;
        }
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
  // instructions are unreachable.
  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB) {
      TerminatorInst *Term = BB->getTerminator();
      if (isa<UnwindInst>(Term)) {
        new UnreachableInst(Term);
        BB->getInstList().erase(Term);
      }
    }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any inlined 'unwind' instructions into branches to the invoke exception
  // destination, and call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
      BranchInst::Create(II->getNormalDest(), TheCall);

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  const Type *RTy = CalledFunc->getReturnType();

  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    PHINode *PHI = 0;
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }

    // Add a branch to the merge points and remove return instructions.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst::Create(AfterCallBB, RI);
      RI->eraseFromParent();
    }
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty())
      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  return true;
}
コード例 #9
0
ファイル: CallSiteSplitting.cpp プロジェクト: mkurdej/llvm
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
  if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
    return false;
  return tryToSplitOnPredicatedArgument(CS) ||
         tryToSplitOnPHIPredicatedArgument(CS);
}
コード例 #10
0
bool CSDataRando::processCallSite(CallSite CS, FuncInfo &FI, PointerEquivalenceAnalysis &P, DSGraph *G) {
  bool IndirectCall = !isa<Function>(CS.getCalledValue()->stripPointerCasts());
  if (IndirectCall) { NumIndirectCalls++; }

  CallSite OriginalCS = originalCallSite(FI, CS);
  if (!DSA->canEncryptCall(OriginalCS)) {
    if (IndirectCall) { NumIndirectCantEncrypt++; }
    return false;
  }

  DSCallSite DSCS = G->getDSCallSiteForCallSite(OriginalCS);
  const Function *Callee = getEffectiveCallee(DSCS, FI, G);
  if (!Callee) {
    if (IndirectCall) { NumIndirectCantEncrypt++; }
    return false;
  }

  FuncInfo &CalleeInfo = FunctionInfo[Callee];
  Value *Clone = getCloneCalledValue(CS, CalleeInfo);
  if (!Clone || CalleeInfo.ArgNodes.empty()) {
    if (IndirectCall) { NumIndirectCantEncrypt++; }
    return false;
  }

  // We create a mapping of the formal argument nodes in the callee function and
  // actual argument nodes in the caller function's graph.
  DSGraph::NodeMapTy NodeMap;
  DSGraph *CalleeG = DSA->getDSGraph(*Callee);

  // getArgNodesForCall places the return node and the vanode in the
  // first two slots of the vector, followed by the nodes for the regular
  // pointer arguments.
  std::vector<DSNodeHandle> ArgNodes;
  getArgNodesForCall(CalleeG, DSCS, ArgNodes);

  // First the return value
  DSNodeHandle CalleeRetNode = ArgNodes[0];
  DSGraph::computeNodeMapping(CalleeRetNode, DSCS.getRetVal(), NodeMap);

  // Then VarArgs
  DSNodeHandle CalleeVarArgNode = ArgNodes[1];
  DSGraph::computeNodeMapping(CalleeVarArgNode, DSCS.getVAVal(), NodeMap);

  // And last the regular arguments.
  for (unsigned int i = 0; i < DSCS.getNumPtrArgs() && i + 2 < ArgNodes.size(); i++) {
    DSGraph::computeNodeMapping(ArgNodes[i + 2], DSCS.getPtrArg(i), NodeMap);
  }

  // Collect the arguments and masks to pass to call
  SmallVector<Value*, 8> Args;
  unsigned int i = 0;
  for (unsigned int e = CS.getFunctionType()->getNumParams(); i < e; i++) {
    Args.push_back(CS.getArgOperand(i));
  }

  for (const DSNode *N : CalleeInfo.ArgNodes) {
    Value *Mask = P.getMaskForNode(NodeMap[N]);
    Args.push_back(Mask);
  }

  // VarArgs go after masks
  for (unsigned int e = CS.arg_size(); i < e; i++) {
    Args.push_back(CS.getArgOperand(i));
  }

  // Do replacement
  Instruction *CI = CS.getInstruction();
  Value *Call;
  if (CS.isCall()) {
    Call = CallInst::Create(Clone, Args, "", CI);
  } else {
    InvokeInst *II = cast<InvokeInst>(CI);
    Call = InvokeInst::Create(Clone, II->getNormalDest(), II->getUnwindDest(), Args, "", II);
  }
  CallSite NewCS(Call);
  NewCS.setCallingConv(CS.getCallingConv());

  CI->replaceAllUsesWith(Call);
  P.replace(CI, Call);
  CI->eraseFromParent();

  return true;
}
コード例 #11
0
void MetadataTransform(CallSite &CS, const TargetData *TD)
{
    Instruction *I = CS.getInstruction();
    Module *M = I->getParent()->getParent()->getParent();
    LLVMContext &Ctx = I->getContext();
    unsigned argIdx = 0;

    if (CS.arg_size() < 2)
        report_fatal_error(I, "_SYS_lti_metadata requires at least two args");

    // Parse the 'key' parameter
    ConstantInt *CI = dyn_cast<ConstantInt>(CS.getArgument(argIdx++));
    if (!CI)
        report_fatal_error(I, "Metadata key must be a constant integer.");
    uint16_t key = CI->getZExtValue();
    if (key != CI->getZExtValue())
        report_fatal_error(I, "Metadata key argument is too large.");

    // Parse the 'fmt' parameter
    std::string fmt;
    if (!GetConstantStringInfo(CS.getArgument(argIdx++), fmt))
        report_fatal_error(I, "Metadata format must be a constant string.");
    if (fmt.size() != CS.arg_size() - 2)
        report_fatal_error(I, "Length of metadata format must match number of parameters");
    if (fmt.size() == 0)
        report_fatal_error(I, "Empty metadata values are not supported");

    /*
     * Parse every other parameter according to the format string
     */

    SmallVector<Constant*, 8> Members;
    unsigned align = 1;

    for (std::string::iterator FI = fmt.begin(), FE = fmt.end();
        FI != FE; ++FI, argIdx++) {
        Constant *Arg = dyn_cast<Constant>(CS.getArgument(argIdx));
        Constant *C;
        if (!Arg)
            report_fatal_error(I, "Metadata argument " + Twine(argIdx+1) + " is not constant");

        /*
         * First, non-integer types
         */

        switch (*FI) {
            case 's': {
                std::string str;
                if (!GetConstantStringInfo(Arg, str))
                    report_fatal_error(I, "Metadata formatter 's' requires a constant string");
                Members.push_back(ConstantArray::get(Ctx, str, false));
                continue;
            }
        }
        
        /*
         * Integer types
         */

        if (Arg->getType()->isPointerTy())
            Arg = ConstantExpr::getPointerCast(Arg, Type::getInt32Ty(Ctx));

        if (!Arg->getType()->isIntegerTy())
            report_fatal_error(I, "Metadata argument " + Twine(argIdx+1) +
                " can't be converted to an integer type.");

        switch (*FI) {

            case 'b':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt8Ty(Ctx), true);
                break;

            case 'B':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt8Ty(Ctx), false);
                break;

            case 'h':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt16Ty(Ctx), true);
                break;

            case 'H':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt16Ty(Ctx), false);
                break;

            case 'i':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt32Ty(Ctx), true);
                break;

            case 'I':
                C = ConstantExpr::getIntegerCast(Arg, Type::getInt32Ty(Ctx), false);
                break;

            default:
                report_fatal_error(I, "Unsupported format character '" + Twine(*FI) + "' in metadata");
        }

        align = std::max(align, TD->getABITypeAlignment(C->getType()));
        Members.push_back(C);
    }

    Constant *Struct = ConstantStruct::getAnon(Members);

    /*
     * Install this metadata item as a global variable in a special section
     */

    GlobalVariable *GV = new GlobalVariable(*M, Struct->getType(),
        true, GlobalValue::ExternalLinkage, Struct, "", 0, false);

    GV->setAlignment(align);
    GV->setName(SVMDecorations::META + Twine(key) + SVMDecorations::SEPARATOR);
    GV->setSection(".metadata");

    // Remove the original _SYS_lti_metadata() call
    I->eraseFromParent();
}
コード例 #12
0
ファイル: Lint.cpp プロジェクト: AnachroNia/llvm
void Lint::visitCallSite(CallSite CS) {
  Instruction &I = *CS.getInstruction();
  Value *Callee = CS.getCalledValue();

  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
                       MemRef::Callee);

  if (Function *F = dyn_cast<Function>(findValue(Callee,
                                                 /*OffsetOk=*/false))) {
    Assert(CS.getCallingConv() == F->getCallingConv(),
           "Undefined behavior: Caller and callee calling convention differ",
           &I);

    FunctionType *FT = F->getFunctionType();
    unsigned NumActualArgs = CS.arg_size();

    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
                          : FT->getNumParams() == NumActualArgs,
           "Undefined behavior: Call argument count mismatches callee "
           "argument count",
           &I);

    Assert(FT->getReturnType() == I.getType(),
           "Undefined behavior: Call return type mismatches "
           "callee return type",
           &I);

    // Check argument types (in case the callee was casted) and attributes.
    // TODO: Verify that caller and callee attributes are compatible.
    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    for (; AI != AE; ++AI) {
      Value *Actual = *AI;
      if (PI != PE) {
        Argument *Formal = &*PI++;
        Assert(Formal->getType() == Actual->getType(),
               "Undefined behavior: Call argument type mismatches "
               "callee parameter type",
               &I);

        // Check that noalias arguments don't alias other arguments. This is
        // not fully precise because we don't know the sizes of the dereferenced
        // memory regions.
        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
            if (AI != BI && (*BI)->getType()->isPointerTy()) {
              AliasResult Result = AA->alias(*AI, *BI);
              Assert(Result != MustAlias && Result != PartialAlias,
                     "Unusual: noalias argument aliases another argument", &I);
            }

        // Check that an sret argument points to valid memory.
        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
          Type *Ty =
            cast<PointerType>(Formal->getType())->getElementType();
          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
                               DL->getABITypeAlignment(Ty), Ty,
                               MemRef::Read | MemRef::Write);
        }
      }
    }
  }

  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
         AI != AE; ++AI) {
      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
      Assert(!isa<AllocaInst>(Obj),
             "Undefined behavior: Call with \"tail\" keyword references "
             "alloca",
             &I);
    }


  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
    switch (II->getIntrinsicID()) {
    default: break;

    // TODO: Check more intrinsics

    case Intrinsic::memcpy: {
      MemCpyInst *MCI = cast<MemCpyInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Read);

      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
      // isn't expressive enough for what we really want to do. Known partial
      // overlap is not distinguished from the case where nothing is known.
      uint64_t Size = 0;
      if (const ConstantInt *Len =
              dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                              /*OffsetOk=*/false)))
        if (Len->getValue().isIntN(32))
          Size = Len->getValue().getZExtValue();
      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
                 MustAlias,
             "Undefined behavior: memcpy source and destination overlap", &I);
      break;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MMI = cast<MemMoveInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Read);
      break;
    }
    case Intrinsic::memset: {
      MemSetInst *MSI = cast<MemSetInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
                           MSI->getAlignment(), nullptr, MemRef::Write);
      break;
    }

    case Intrinsic::vastart:
      Assert(I.getParent()->getParent()->isVarArg(),
             "Undefined behavior: va_start called in a non-varargs function",
             &I);

      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    case Intrinsic::vacopy:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Write);
      visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read);
      break;
    case Intrinsic::vaend:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;

    case Intrinsic::stackrestore:
      // Stackrestore doesn't read or write memory, but it sets the
      // stack pointer, which the compiler may read from or write to
      // at any time, so check it for both readability and writeability.
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    }
}
コード例 #13
0
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
///
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  // If IFI has any state in it, zap it before we fill it in.
  IFI.reset();
  
  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;

  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get the personality function from the callee if it contains a landing pad.
  Value *CalleePersonality = 0;
  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
       I != E; ++I)
    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
      const BasicBlock *BB = II->getUnwindDest();
      const LandingPadInst *LP = BB->getLandingPadInst();
      CalleePersonality = LP->getPersonalityFn();
      break;
    }

  // Find the personality function used by the landing pads of the caller. If it
  // exists, then check to see that it matches the personality function used in
  // the callee.
  if (CalleePersonality) {
    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
         I != E; ++I)
      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
        const BasicBlock *BB = II->getUnwindDest();
        const LandingPadInst *LP = BB->getLandingPadInst();

        // If the personality functions match, then we can perform the
        // inlining. Otherwise, we can't inline.
        // TODO: This isn't 100% true. Some personality functions are proper
        //       supersets of others and can be used in place of the other.
        if (LP->getPersonalityFn() != CalleePersonality)
          return false;

        break;
      }
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  SmallVector<ReturnInst*, 8> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy VMap after cloning.
    ValueToValueMapTy VMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;
      const Argument *Arg = I;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CS.isByValArgument(ArgNo)) {
        ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
                                        CalledFunc->getParamAlignment(ArgNo+1));
 
        // Calls that we inline may use the new alloca, so we need to clear
        // their 'tail' flags if HandleByValArgument introduced a new alloca and
        // the callee has calls.
        MustClearTailCallFlags |= ActualArg != *AI;
      }

      VMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                              /*ModuleLevelChanges=*/false, Returns, ".i",
                              &InlinedFunctionInfo, IFI.TD, TheCall);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (IFI.CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

    // Update inlined instructions' line number information.
    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
         E = FirstNewBlock->end(); I != E; ) {
      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
      if (AI == 0) continue;
      
      // If the alloca is now dead, remove it.  This often occurs due to code
      // specialization.
      if (AI->use_empty()) {
        AI->eraseFromParent();
        continue;
      }

      if (!isa<Constant>(AI->getArraySize()))
        continue;
      
      // Keep track of the static allocas that we inline into the caller.
      IFI.StaticAllocas.push_back(AI);
      
      // Scan for the block of allocas that we can move over, and move them
      // all at once.
      while (isa<AllocaInst>(I) &&
             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
        ++I;
      }

      // Transfer all of the allocas over in a block.  Using splice means
      // that the instructions aren't removed from the symbol table, then
      // reinserted.
      Caller->getEntryBlock().getInstList().splice(InsertPoint,
                                                   FirstNewBlock->getInstList(),
                                                   AI, I);
    }
  }

  // Leave lifetime markers for the static alloca's, scoping them to the
  // function we just inlined.
  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    IRBuilder<> builder(FirstNewBlock->begin());
    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
      AllocaInst *AI = IFI.StaticAllocas[ai];

      // If the alloca is already scoped to something smaller than the whole
      // function then there's no need to add redundant, less accurate markers.
      if (hasLifetimeMarkers(AI))
        continue;

      // Try to determine the size of the allocation.
      ConstantInt *AllocaSize = 0;
      if (ConstantInt *AIArraySize =
          dyn_cast<ConstantInt>(AI->getArraySize())) {
        if (IFI.TD) {
          Type *AllocaType = AI->getAllocatedType();
          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
          // Check that array size doesn't saturate uint64_t and doesn't
          // overflow when it's multiplied by type size.
          if (AllocaArraySize != ~0ULL &&
              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                          AllocaArraySize * AllocaTypeSize);
          }
        }
      }

      builder.CreateLifetimeStart(AI, AllocaSize);
      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
        IRBuilder<> builder(Returns[ri]);
        builder.CreateLifetimeEnd(AI, AllocaSize);
      }
    }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
      .CreateCall(StackSave, "savedstack");

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
    }

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      if (TheCall == R->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  BranchInst *CreatedBranchToNormalDest = NULL;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  Type *RTy = CalledFunc->getReturnType();

  PHINode *PHI = 0;
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }


    // Add a branch to the merge points and remove return instructions.
    DebugLoc Loc;
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
      Loc = RI->getDebugLoc();
      BI->setDebugLoc(Loc);
      RI->eraseFromParent();
    }
    // We need to set the debug location to *somewhere* inside the
    // inlined function. The line number may be nonsensical, but the
    // instruction will at least be associated with the right
    // function.
    if (CreatedBranchToNormalDest)
      CreatedBranchToNormalDest->setDebugLoc(Loc);
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty()) {
      if (TheCall == Returns[0]->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    }

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    if (CreatedBranchToNormalDest)
      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  // If we inserted a phi node, check to see if it has a single value (e.g. all
  // the entries are the same or undef).  If so, remove the PHI so it doesn't
  // block other optimizations.
  if (PHI) {
    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
      PHI->replaceAllUsesWith(V);
      PHI->eraseFromParent();
    }
  }

  return true;
}
コード例 #14
0
// FIXME: This might have been relevant for the old JIT but the MCJIT
// has a completly different implementation so this comment below is
// likely irrelevant and misleading.
//
// For performance purposes we construct the stub in such a way that the
// arguments pointer is passed through the static global variable gTheArgsP in
// this file. This is done so that the stub function prototype trivially matches
// the special cases that the JIT knows how to directly call. If this is not
// done, then the jit will end up generating a nullary stub just to call our
// stub, for every single function call.
Function *ExternalDispatcherImpl::createDispatcher(Function *target,
                                                   Instruction *inst,
                                                   Module *module) {
  if (!resolveSymbol(target->getName()))
    return 0;

  CallSite cs;
  if (inst->getOpcode() == Instruction::Call) {
    cs = CallSite(cast<CallInst>(inst));
  } else {
    cs = CallSite(cast<InvokeInst>(inst));
  }

  Value **args = new Value *[cs.arg_size()];

  std::vector<Type *> nullary;

  // MCJIT functions need unique names, or wrong function can be called.
  // The module identifier is included because for the MCJIT we need
  // unique function names across all `llvm::Modules`s.
  std::string fnName =
      "dispatcher_" + target->getName().str() + module->getModuleIdentifier();
  Function *dispatcher =
      Function::Create(FunctionType::get(Type::getVoidTy(ctx), nullary, false),
                       GlobalVariable::ExternalLinkage, fnName, module);

  BasicBlock *dBB = BasicBlock::Create(ctx, "entry", dispatcher);

  // Get a Value* for &gTheArgsP, as an i64**.
  Instruction *argI64sp = new IntToPtrInst(
      ConstantInt::get(Type::getInt64Ty(ctx), (uintptr_t)(void *)&gTheArgsP),
      PointerType::getUnqual(PointerType::getUnqual(Type::getInt64Ty(ctx))),
      "argsp", dBB);
  Instruction *argI64s = new LoadInst(argI64sp, "args", dBB);

  // Get the target function type.
  FunctionType *FTy = cast<FunctionType>(
      cast<PointerType>(target->getType())->getElementType());

  // Each argument will be passed by writing it into gTheArgsP[i].
  unsigned i = 0, idx = 2;
  for (CallSite::arg_iterator ai = cs.arg_begin(), ae = cs.arg_end(); ai != ae;
       ++ai, ++i) {
    // Determine the type the argument will be passed as. This accomodates for
    // the corresponding code in Executor.cpp for handling calls to bitcasted
    // functions.
    Type *argTy =
        (i < FTy->getNumParams() ? FTy->getParamType(i) : (*ai)->getType());
    Instruction *argI64p = GetElementPtrInst::Create(
        KLEE_LLVM_GEP_TYPE(nullptr)
        argI64s, ConstantInt::get(Type::getInt32Ty(ctx), idx), "", dBB);

    Instruction *argp =
        new BitCastInst(argI64p, PointerType::getUnqual(argTy), "", dBB);
    args[i] = new LoadInst(argp, "", dBB);

    unsigned argSize = argTy->getPrimitiveSizeInBits();
    idx += ((!!argSize ? argSize : 64) + 63) / 64;
  }

  Constant *dispatchTarget = module->getOrInsertFunction(
      target->getName(), FTy, target->getAttributes());
  Instruction *result = CallInst::Create(
      dispatchTarget, llvm::ArrayRef<Value *>(args, args + i), "", dBB);
  if (result->getType() != Type::getVoidTy(ctx)) {
    Instruction *resp = new BitCastInst(
        argI64s, PointerType::getUnqual(result->getType()), "", dBB);
    new StoreInst(result, resp, dBB);
  }

  ReturnInst::Create(ctx, dBB);

  delete[] args;

  return dispatcher;
}
コード例 #15
0
ファイル: InlineCost.cpp プロジェクト: meteogrid/llvm
int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
  // Get information about the callee.
  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];

  // If we haven't calculated this information yet, do so now.
  if (CalleeFI->Metrics.NumBlocks == 0)
    CalleeFI->analyzeFunction(Callee, TD);

  // InlineCost - This value measures how good of an inline candidate this call
  // site is to inline.  A lower inline cost make is more likely for the call to
  // be inlined.  This value may go negative.
  //
  int InlineCost = 0;

  // Compute any size reductions we can expect due to arguments being passed into
  // the function.
  //
  unsigned ArgNo = 0;
  CallSite::arg_iterator I = CS.arg_begin();
  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
       FI != FE; ++I, ++FI, ++ArgNo) {

    // If an alloca is passed in, inlining this function is likely to allow
    // significant future optimization possibilities (like scalar promotion, and
    // scalarization), so encourage the inlining of the function.
    //
    if (isa<AllocaInst>(I))
      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;

    // If this is a constant being passed into the function, use the argument
    // weights calculated for the callee to determine how much will be folded
    // away with this information.
    else if (isa<Constant>(I))
      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
  }

  const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights
    = CalleeFI->PointerArgPairWeights;
  for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I
         = ArgPairWeights.begin(), E = ArgPairWeights.end();
       I != E; ++I)
    if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() ==
        CS.getArgument(I->first.second)->stripInBoundsConstantOffsets())
      InlineCost -= I->second;

  // Each argument passed in has a cost at both the caller and the callee
  // sides.  Measurements show that each argument costs about the same as an
  // instruction.
  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);

  // Now that we have considered all of the factors that make the call site more
  // likely to be inlined, look at factors that make us not want to inline it.

  // Calls usually take a long time, so they make the inlining gain smaller.
  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;

  // Look at the size of the callee. Each instruction counts as 5.
  InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;

  return InlineCost;
}
コード例 #16
0
InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
                               Function *Callee,
                               SmallPtrSet<const Function*, 16> &NeverInline) {
  Instruction *TheCall = CS.getInstruction();
  Function *Caller = TheCall->getParent()->getParent();
  bool isDirectCall = CS.getCalledFunction() == Callee;

  // Don't inline functions which can be redefined at link-time to mean
  // something else.  Don't inline functions marked noinline or call sites
  // marked noinline.
  if (Callee->mayBeOverridden() ||
      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
      CS.isNoInline())
    return llvm::InlineCost::getNever();

  // Get information about the callee.
  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
  
  // If we haven't calculated this information yet, do so now.
  if (CalleeFI->Metrics.NumBlocks == 0)
    CalleeFI->analyzeFunction(Callee);

  // If we should never inline this, return a huge cost.
  if (CalleeFI->NeverInline())
    return InlineCost::getNever();

  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
  // could move this up and avoid computing the FunctionInfo for
  // things we are going to just return always inline for. This
  // requires handling setjmp somewhere else, however.
  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
    return InlineCost::getAlways();
    
  if (CalleeFI->Metrics.usesDynamicAlloca) {
    // Get infomation about the caller.
    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];

    // If we haven't calculated this information yet, do so now.
    if (CallerFI.Metrics.NumBlocks == 0) {
      CallerFI.analyzeFunction(Caller);
     
      // Recompute the CalleeFI pointer, getting Caller could have invalidated
      // it.
      CalleeFI = &CachedFunctionInfo[Callee];
    }

    // Don't inline a callee with dynamic alloca into a caller without them.
    // Functions containing dynamic alloca's are inefficient in various ways;
    // don't create more inefficiency.
    if (!CallerFI.Metrics.usesDynamicAlloca)
      return InlineCost::getNever();
  }

  // InlineCost - This value measures how good of an inline candidate this call
  // site is to inline.  A lower inline cost make is more likely for the call to
  // be inlined.  This value may go negative.
  //
  int InlineCost = 0;

  // Add to the inline quality for properties that make the call valuable to
  // inline.  This includes factors that indicate that the result of inlining
  // the function will be optimizable.  Currently this just looks at arguments
  // passed into the function.
  //
  unsigned ArgNo = 0;
  CallSite::arg_iterator I = CS.arg_begin();
  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
       FI != FE; ++I, ++FI, ++ArgNo) {

    // If an alloca is passed in, inlining this function is likely to allow
    // significant future optimization possibilities (like scalar promotion, and
    // scalarization), so encourage the inlining of the function.
    //
    if (isa<AllocaInst>(I))
      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;

    // If this is a constant being passed into the function, use the argument
    // weights calculated for the callee to determine how much will be folded
    // away with this information.
    else if (isa<Constant>(I)) {
      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
       
      // Compute any constant bonus due to inlining we want to give here.
      InlineCost -= CountBonusForConstant(FI);
    }
  }
  
  // Each argument passed in has a cost at both the caller and the callee
  // sides.  Measurements show that each argument costs about the same as an
  // instruction.
  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);

  // If there is only one call of the function, and it has internal linkage,
  // make it almost guaranteed to be inlined.
  //
  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
    InlineCost += InlineConstants::LastCallToStaticBonus;

  // Now that we have considered all of the factors that make the call site more
  // likely to be inlined, look at factors that make us not want to inline it.

  // If the instruction after the call, or if the normal destination of the
  // invoke is an unreachable instruction, the function is noreturn.  As such,
  // there is little point in inlining this.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
      InlineCost += InlineConstants::NoreturnPenalty;
  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
    InlineCost += InlineConstants::NoreturnPenalty;
  
  // If this function uses the coldcc calling convention, prefer not to inline
  // it.
  if (Callee->getCallingConv() == CallingConv::Cold)
    InlineCost += InlineConstants::ColdccPenalty;

  // Calls usually take a long time, so they make the inlining gain smaller.
  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;

  // Look at the size of the callee. Each instruction counts as 5.
  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;

  return llvm::InlineCost::get(InlineCost);
}
コード例 #17
0
ファイル: InlineCost.cpp プロジェクト: headmyshoulder/llvm
/// \brief Analyze a call site for potential inlining.
///
/// Returns true if inlining this call is viable, and false if it is not
/// viable. It computes the cost and adjusts the threshold based on numerous
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
bool CallAnalyzer::analyzeCall(CallSite CS) {
  ++NumCallsAnalyzed;

  // Track whether the post-inlining function would have more than one basic
  // block. A single basic block is often intended for inlining. Balloon the
  // threshold by 50% until we pass the single-BB phase.
  bool SingleBB = true;
  int SingleBBBonus = Threshold / 2;
  Threshold += SingleBBBonus;

  // Perform some tweaks to the cost and threshold based on the direct
  // callsite information.

  // We want to more aggressively inline vector-dense kernels, so up the
  // threshold, and we'll lower it if the % of vector instructions gets too
  // low.
  assert(NumInstructions == 0);
  assert(NumVectorInstructions == 0);
  FiftyPercentVectorBonus = Threshold;
  TenPercentVectorBonus = Threshold / 2;

  // Give out bonuses per argument, as the instructions setting them up will
  // be gone after inlining.
  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
    if (TD && CS.isByValArgument(I)) {
      // We approximate the number of loads and stores needed by dividing the
      // size of the byval type by the target's pointer size.
      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
      unsigned PointerSize = TD->getPointerSizeInBits();
      // Ceiling division.
      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;

      // If it generates more than 8 stores it is likely to be expanded as an
      // inline memcpy so we take that as an upper bound. Otherwise we assume
      // one load and one store per word copied.
      // FIXME: The maxStoresPerMemcpy setting from the target should be used
      // here instead of a magic number of 8, but it's not available via
      // DataLayout.
      NumStores = std::min(NumStores, 8U);

      Cost -= 2 * NumStores * InlineConstants::InstrCost;
    } else {
      // For non-byval arguments subtract off one instruction per call
      // argument.
      Cost -= InlineConstants::InstrCost;
    }
  }

  // If there is only one call of the function, and it has internal linkage,
  // the cost of inlining it drops dramatically.
  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
    &F == CS.getCalledFunction();
  if (OnlyOneCallAndLocalLinkage)
    Cost += InlineConstants::LastCallToStaticBonus;

  // If the instruction after the call, or if the normal destination of the
  // invoke is an unreachable instruction, the function is noreturn. As such,
  // there is little point in inlining this unless there is literally zero
  // cost.
  Instruction *Instr = CS.getInstruction();
  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
      Threshold = 1;
  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
    Threshold = 1;

  // If this function uses the coldcc calling convention, prefer not to inline
  // it.
  if (F.getCallingConv() == CallingConv::Cold)
    Cost += InlineConstants::ColdccPenalty;

  // Check if we're done. This can happen due to bonuses and penalties.
  if (Cost > Threshold)
    return false;

  if (F.empty())
    return true;

  Function *Caller = CS.getInstruction()->getParent()->getParent();
  // Check if the caller function is recursive itself.
  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
       U != E; ++U) {
    CallSite Site(cast<Value>(*U));
    if (!Site)
      continue;
    Instruction *I = Site.getInstruction();
    if (I->getParent()->getParent() == Caller) {
      IsCallerRecursive = true;
      break;
    }
  }

  // Track whether we've seen a return instruction. The first return
  // instruction is free, as at least one will usually disappear in inlining.
  bool HasReturn = false;

  // Populate our simplified values by mapping from function arguments to call
  // arguments with known important simplifications.
  CallSite::arg_iterator CAI = CS.arg_begin();
  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
       FAI != FAE; ++FAI, ++CAI) {
    assert(CAI != CS.arg_end());
    if (Constant *C = dyn_cast<Constant>(CAI))
      SimplifiedValues[FAI] = C;

    Value *PtrArg = *CAI;
    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());

      // We can SROA any pointer arguments derived from alloca instructions.
      if (isa<AllocaInst>(PtrArg)) {
        SROAArgValues[FAI] = PtrArg;
        SROAArgCosts[PtrArg] = 0;
      }
    }
  }
  NumConstantArgs = SimplifiedValues.size();
  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
  NumAllocaArgs = SROAArgValues.size();

  // The worklist of live basic blocks in the callee *after* inlining. We avoid
  // adding basic blocks of the callee which can be proven to be dead for this
  // particular call site in order to get more accurate cost estimates. This
  // requires a somewhat heavyweight iteration pattern: we need to walk the
  // basic blocks in a breadth-first order as we insert live successors. To
  // accomplish this, prioritizing for small iterations because we exit after
  // crossing our threshold, we use a small-size optimized SetVector.
  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
  BBSetVector BBWorklist;
  BBWorklist.insert(&F.getEntryBlock());
  // Note that we *must not* cache the size, this loop grows the worklist.
  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
    // Bail out the moment we cross the threshold. This means we'll under-count
    // the cost, but only when undercounting doesn't matter.
    if (Cost > (Threshold + VectorBonus))
      break;

    BasicBlock *BB = BBWorklist[Idx];
    if (BB->empty())
      continue;

    // Handle the terminator cost here where we can track returns and other
    // function-wide constructs.
    TerminatorInst *TI = BB->getTerminator();

    // We never want to inline functions that contain an indirectbr.  This is
    // incorrect because all the blockaddress's (in static global initializers
    // for example) would be referring to the original function, and this
    // indirect jump would jump from the inlined copy of the function into the 
    // original function which is extremely undefined behavior.
    // FIXME: This logic isn't really right; we can safely inline functions
    // with indirectbr's as long as no other function or global references the
    // blockaddress of a block within the current function.  And as a QOI issue,
    // if someone is using a blockaddress without an indirectbr, and that
    // reference somehow ends up in another function or global, we probably
    // don't want to inline this function.
    if (isa<IndirectBrInst>(TI))
      return false;

    if (!HasReturn && isa<ReturnInst>(TI))
      HasReturn = true;
    else
      Cost += InlineConstants::InstrCost;

    // Analyze the cost of this block. If we blow through the threshold, this
    // returns false, and we can bail on out.
    if (!analyzeBlock(BB)) {
      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
        return false;

      // If the caller is a recursive function then we don't want to inline
      // functions which allocate a lot of stack space because it would increase
      // the caller stack usage dramatically.
      if (IsCallerRecursive &&
          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
        return false;

      break;
    }

    // Add in the live successors by first checking whether we have terminator
    // that may be simplified based on the values simplified by this call.
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (BI->isConditional()) {
        Value *Cond = BI->getCondition();
        if (ConstantInt *SimpleCond
              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
          continue;
        }
      }
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
      Value *Cond = SI->getCondition();
      if (ConstantInt *SimpleCond
            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
        continue;
      }
    }

    // If we're unable to select a particular successor, just count all of
    // them.
    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
         ++TIdx)
      BBWorklist.insert(TI->getSuccessor(TIdx));

    // If we had any successors at this point, than post-inlining is likely to
    // have them as well. Note that we assume any basic blocks which existed
    // due to branches or switches which folded above will also fold after
    // inlining.
    if (SingleBB && TI->getNumSuccessors() > 1) {
      // Take off the bonus we applied to the threshold.
      Threshold -= SingleBBBonus;
      SingleBB = false;
    }
  }

  // If this is a noduplicate call, we can still inline as long as 
  // inlining this would cause the removal of the caller (so the instruction
  // is not actually duplicated, just moved).
  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
    return false;

  Threshold += VectorBonus;

  return Cost < Threshold;
}
コード例 #18
0
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
  // The general transformation to keep in mind is
  //
  //   call @func(..., src, ...)
  //   memcpy(dest, src, ...)
  //
  // ->
  //
  //   memcpy(dest, src, ...)
  //   call @func(..., dest, ...)
  //
  // Since moving the memcpy is technically awkward, we additionally check that
  // src only holds uninitialized values at the moment of the call, meaning that
  // the memcpy can be discarded rather than moved.

  // Deliberately get the source and destination with bitcasts stripped away,
  // because we'll need to do type comparisons based on the underlying type.
  Value *cpyDest = cpy->getDest();
  Value *cpySrc = cpy->getSource();
  CallSite CS = CallSite::get(C);

  // We need to be able to reason about the size of the memcpy, so we require
  // that it be a constant.
  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
  if (!cpyLength)
    return false;

  // Require that src be an alloca.  This simplifies the reasoning considerably.
  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
  if (!srcAlloca)
    return false;

  // Check that all of src is copied to dest.
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false;

  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
  if (!srcArraySize)
    return false;

  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
    srcArraySize->getZExtValue();

  if (cpyLength->getZExtValue() < srcSize)
    return false;

  // Check that accessing the first srcSize bytes of dest will not cause a
  // trap.  Otherwise the transform is invalid since it might cause a trap
  // to occur earlier than it otherwise would.
  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
    // The destination is an alloca.  Check it is larger than srcSize.
    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
    if (!destArraySize)
      return false;

    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
      destArraySize->getZExtValue();

    if (destSize < srcSize)
      return false;
  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
    // If the destination is an sret parameter then only accesses that are
    // outside of the returned struct type can trap.
    if (!A->hasStructRetAttr())
      return false;

    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
    uint64_t destSize = TD->getTypeAllocSize(StructTy);

    if (destSize < srcSize)
      return false;
  } else {
    return false;
  }

  // Check that src is not accessed except via the call and the memcpy.  This
  // guarantees that it holds only undefined values when passed in (so the final
  // memcpy can be dropped), that it is not read or written between the call and
  // the memcpy, and that writing beyond the end of it is undefined.
  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
                                   srcAlloca->use_end());
  while (!srcUseList.empty()) {
    User *UI = srcUseList.pop_back_val();

    if (isa<BitCastInst>(UI)) {
      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
           I != E; ++I)
        srcUseList.push_back(*I);
    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
      if (G->hasAllZeroIndices())
        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
             I != E; ++I)
          srcUseList.push_back(*I);
      else
        return false;
    } else if (UI != C && UI != cpy) {
      return false;
    }
  }

  // Since we're changing the parameter to the callsite, we need to make sure
  // that what would be the new parameter dominates the callsite.
  DominatorTree &DT = getAnalysis<DominatorTree>();
  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
    if (!DT.dominates(cpyDestInst, C))
      return false;

  // In addition to knowing that the call does not access src in some
  // unexpected manner, for example via a global, which we deduce from
  // the use analysis, we also need to know that it does not sneakily
  // access dest.  We rely on AA to figure this out for us.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
      AliasAnalysis::NoModRef)
    return false;

  // All the checks have passed, so do the transformation.
  bool changedArgument = false;
  for (unsigned i = 0; i < CS.arg_size(); ++i)
    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
      if (cpySrc->getType() != cpyDest->getType())
        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                              cpyDest->getName(), C);
      changedArgument = true;
      if (CS.getArgument(i)->getType() == cpyDest->getType())
        CS.setArgument(i, cpyDest);
      else
        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
    }

  if (!changedArgument)
    return false;

  // Drop any cached information about the call, because we may have changed
  // its dependence information by changing its parameter.
  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
  MD.removeInstruction(C);

  // Remove the memcpy
  MD.removeInstruction(cpy);
  cpy->eraseFromParent();
  ++NumMemCpyInstr;

  return true;
}
コード例 #19
0
CallSite SancusModuleCreator::handleSancusCall(CallSite cs)
{
    assert(cs.arg_size() >= 2 && "sancus_call needs at least 2 arguments");

    auto numAsmArgs = cs.arg_size() - 2;

    if (numAsmArgs > 3)
    {
        report_fatal_error("Currently, maximum 3 arguments are supported by "
                           "sancus_call");
    }

    auto argTys = std::vector<Type*>{voidPtrTy, wordTy};
    auto entryVal = cs.getArgument(0);
    auto indexVal = cs.getArgument(1);
    auto args = std::vector<Value*>{entryVal, indexVal};

    auto inputConstraints = std::string{",r,r"};
    std::ostringstream argsAsm;
    auto regsUsage = unsigned{0x10};
    auto clobbers = std::string{",~{r6},~{r7},~{r8},~{r15}"};
    auto callerInfo = getSancusModuleInfo(cs.getCaller());

    for (decltype(numAsmArgs) i = 0; i < numAsmArgs; i++)
    {
        auto arg = cs.getArgument(i + 2);
        auto argTy = arg->getType();

        // TODO FunctionCcInfo should be used to check the argument. This is not
        // possible currently since it takes a Function* as argument.
        if (argTy->getPrimitiveSizeInBits() != 16 && !argTy->isPointerTy())
            report_fatal_error("Illegal argument type in call to sancus_call");

        inputConstraints += ",r";

        std::ostringstream regStream;
        regStream << "r" << 15 - i;
        auto regStr = regStream.str();

        if (regStr != "r15")
            clobbers += ",~{" + regStr + "}";

        argsAsm << "mov $" << i + 3 << ", " << regStr << "\n\t";
        args.push_back(arg);
        argTys.push_back(argTy);

        // see sm_exit.s to understand how this value is used.
        regsUsage >>= 1;
    }

    auto asmStr = Twine("push #1f\n\t"
                        "push r6\n\t"
                        "push r7\n\t"
                        "push r8\n\t"
                        "mov $2, r6\n\t"
                        "mov #" + Twine(regsUsage) + ", r7\n\t"
                        "mov $1, r8\n\t" + argsAsm.str() +
                        "br #" + callerInfo.getExitName() + "\n"
                        "1:\n\t"
                        "mov r15, $0"
                        ).str();

    auto constraintsStr = "=r" + inputConstraints + clobbers;
    auto resTy = wordTy;
    auto asmFuncTy = FunctionType::get(resTy, argTys, /*isVarArg=*/false);

    // NOTE hasSideEffects has to be true because the inline assembly may be
    // optimised away otherwise
    auto inlineAsm = InlineAsm::get(asmFuncTy, asmStr, constraintsStr,
                                    /*hasSideEffects=*/true);

    auto asmCall = CallInst::Create(inlineAsm, args);
    return CallSite(asmCall);
}
コード例 #20
0
ファイル: Local.cpp プロジェクト: brills/pfpa
void GraphBuilder::visitCallSite(CallSite CS) {
  //
  // Get the called value.  Strip off any casts which are lossless.
  //
  Value *Callee = CS.getCalledValue()->stripPointerCasts();

  // Special case handling of certain libc allocation functions here.
  if (Function *F = dyn_cast<Function>(Callee))
    if (F->isIntrinsic() && visitIntrinsic(CS, F))
      return;

  //Can't do much about inline asm (yet!)
  if (isa<InlineAsm> (Callee)) {
    ++NumAsmCall;
    DSNodeHandle RetVal;
    Instruction *I = CS.getInstruction();
    if (isa<PointerType > (I->getType()))
      RetVal = getValueDest(I);

    // Calculate the arguments vector...
    for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I)
      if (isa<PointerType > ((*I)->getType()))
        RetVal.mergeWith(getValueDest(*I));
    if (!RetVal.isNull())
      RetVal.getNode()->foldNodeCompletely();
    return;
  }

  // Set up the return value...
  DSNodeHandle RetVal;
  Instruction *I = CS.getInstruction();
  if (isa<PointerType>(I->getType()))
    RetVal = getValueDest(I);

  DSNode *CalleeNode = 0;
  if (!isa<Function>(Callee)) {
    CalleeNode = getValueDest(Callee).getNode();
    if (CalleeNode == 0) {
      DEBUG(errs() << "WARNING: Program is calling through a null pointer?\n" << *I);
      return;  // Calling a null pointer?
    }
  }

  // NOTE: This code is identical to 'DSGraph::getDSCallSiteForCallSite',
  // the reason it's duplicated is because this calls getValueDest instead
  // of getNodeForValue to get the DSNodes for the arguments.  Since we're in
  // local it's possible that we need to create a DSNode for the argument, as
  // opposed to getNodeForValue which simply retrieves the existing node.


  //Get the FunctionType for the called function
  const FunctionType *CalleeFuncType = DSCallSite::FunctionTypeOfCallSite(CS);
  int NumFixedArgs = CalleeFuncType->getNumParams();

  // Sanity check--this really, really shouldn't happen
  if (!CalleeFuncType->isVarArg())
    assert(CS.arg_size() == static_cast<unsigned>(NumFixedArgs) &&
           "Too many arguments/incorrect function signature!");

  std::vector<DSNodeHandle> Args;
  Args.reserve(CS.arg_end()-CS.arg_begin());
  DSNodeHandle VarArgNH;

  // Calculate the arguments vector...
  // Add all fixed pointer arguments, then merge the rest together
  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
       I != E; ++I)
    if (isa<PointerType>((*I)->getType())) {
      DSNodeHandle ArgNode = getValueDest(*I);
      if (I - CS.arg_begin() < NumFixedArgs) {
        Args.push_back(ArgNode);
      } else {
        VarArgNH.mergeWith(ArgNode);
      }
    }

  // Add a new function call entry...
  if (CalleeNode) {
    ++NumIndirectCall;
    G.getFunctionCalls().push_back(DSCallSite(CS, RetVal, VarArgNH, CalleeNode,
                                              Args));
  } else {
    ++NumDirectCall;
    G.getFunctionCalls().push_back(DSCallSite(CS, RetVal, VarArgNH,
                                              cast<Function>(Callee),
                                              Args));
  }


}
コード例 #21
0
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
  ++NumBlocks;
  unsigned NumInstsBeforeThisBB = NumInsts;
  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
       II != E; ++II) {
    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.

    // Special handling for calls.
    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
      if (isa<DbgInfoIntrinsic>(II))
        continue;  // Debug intrinsics don't count as size.
      
      CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
      
      // If this function contains a call to setjmp or _setjmp, never inline
      // it.  This is a hack because we depend on the user marking their local
      // variables as volatile if they are live across a setjmp call, and they
      // probably won't do this in callers.
      if (Function *F = CS.getCalledFunction()) {
        if (F->isDeclaration() && 
            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
          callsSetJmp = true;
       
        // If this call is to function itself, then the function is recursive.
        // Inlining it into other functions is a bad idea, because this is
        // basically just a form of loop peeling, and our metrics aren't useful
        // for that case.
        if (F == BB->getParent())
          isRecursive = true;
      }

      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
        // Each argument to a call takes on average one instruction to set up.
        NumInsts += CS.arg_size();

        // We don't want inline asm to count as a call - that would prevent loop
        // unrolling. The argument setup cost is still real, though.
        if (!isa<InlineAsm>(CS.getCalledValue()))
          ++NumCalls;
      }
    }
    
    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
      if (!AI->isStaticAlloca())
        this->usesDynamicAlloca = true;
    }

    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
      ++NumVectorInsts; 
    
    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
      // Noop casts, including ptr <-> int,  don't count.
      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
          isa<PtrToIntInst>(CI))
        continue;
      // Result of a cmp instruction is often extended (to be used by other
      // cmp instructions, logical or return instructions). These are usually
      // nop on most sane targets.
      if (isa<CmpInst>(CI->getOperand(0)))
        continue;
    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
      // If a GEP has all constant indices, it will probably be folded with
      // a load/store.
      if (GEPI->hasAllConstantIndices())
        continue;
    }

    ++NumInsts;
  }
  
  if (isa<ReturnInst>(BB->getTerminator()))
    ++NumRets;
  
  // We never want to inline functions that contain an indirectbr.  This is
  // incorrect because all the blockaddress's (in static global initializers
  // for example) would be referring to the original function, and this indirect
  // jump would jump from the inlined copy of the function into the original
  // function which is extremely undefined behavior.
  if (isa<IndirectBrInst>(BB->getTerminator()))
    containsIndirectBr = true;

  // Remember NumInsts for this BB.
  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
}