void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
    ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
    bool VarArgDisallowed, bool ForceVoidReturnTy) {
  StatepointLoweringInfo SI(DAG);
  unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
  populateCallLoweringInfo(
      SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
      ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
      false);
  if (!VarArgDisallowed)
    SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();

  auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);

  unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;

  auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
  SI.ID = SD.StatepointID.getValueOr(DefaultID);
  SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);

  SI.DeoptState =
      ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
  SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
  SI.EHPadBB = EHPadBB;

  // NB! The GC arguments are deliberately left empty.

  if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
    const Instruction *Inst = CS.getInstruction();
    ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
    setValue(Inst, ReturnVal);
  }
}
Example #2
0
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
    ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
  StatepointLoweringInfo SI(DAG);
  unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
  populateCallLoweringInfo(SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(),
                           Callee, CS.getType(), false);

  auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);

  unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;

  auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
  SI.ID = SD.StatepointID.getValueOr(DefaultID);
  SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);

  SI.DeoptState =
      ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
  SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
  SI.EHPadBB = EHPadBB;

  if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
    const Instruction *Inst = CS.getInstruction();
    ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
    setValue(Inst, ReturnVal);
  }
}
Example #3
0
ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
                                            ImmutableCallSite CS2) {
  if (!EnableTBAA)
    return AAResultBase::getModRefInfo(CS1, CS2);

  if (const MDNode *M1 =
          CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
    if (const MDNode *M2 =
            CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
      if (!Aliases(M1, M2))
        return MRI_NoModRef;

  return AAResultBase::getModRefInfo(CS1, CS2);
}
Example #4
0
AliasAnalysis::ModRefResult
ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
  if (!EnableScopedNoAlias)
    return AliasAnalysis::getModRefInfo(CS, Loc);

  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMDNode(
                                              LLVMContext::MD_noalias)))
    return NoModRef;

  if (!mayAliasInScopes(
          CS.getInstruction()->getMDNode(LLVMContext::MD_alias_scope),
          Loc.AATags.NoAlias))
    return NoModRef;

  return AliasAnalysis::getModRefInfo(CS, Loc);
}
Example #5
0
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
                                                const MemoryLocation &Loc) {
  if (!EnableScopedNoAlias)
    return AAResultBase::getModRefInfo(CS, Loc);

  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
                                              LLVMContext::MD_noalias)))
    return MRI_NoModRef;

  if (!mayAliasInScopes(
          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
          Loc.AATags.NoAlias))
    return MRI_NoModRef;

  return AAResultBase::getModRefInfo(CS, Loc);
}
AliasAnalysis::ModRefResult
ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
                                    const MemoryLocation &Loc) {
  if (!EnableARCOpts)
    return AliasAnalysis::getModRefInfo(CS, Loc);

  switch (GetBasicARCInstKind(CS.getInstruction())) {
  case ARCInstKind::Retain:
  case ARCInstKind::RetainRV:
  case ARCInstKind::Autorelease:
  case ARCInstKind::AutoreleaseRV:
  case ARCInstKind::NoopCast:
  case ARCInstKind::AutoreleasepoolPush:
  case ARCInstKind::FusedRetainAutorelease:
  case ARCInstKind::FusedRetainAutoreleaseRV:
    // These functions don't access any memory visible to the compiler.
    // Note that this doesn't include objc_retainBlock, because it updates
    // pointers when it copies block data.
    return NoModRef;
  default:
    break;
  }

  return AliasAnalysis::getModRefInfo(CS, Loc);
}
Example #7
0
/// callIsSmall - If a call is likely to lower to a single target instruction,
/// or is otherwise deemed small return true.
/// TODO: Perhaps calls like memcpy, strcpy, etc?
bool llvm::callIsSmall(ImmutableCallSite CS) {
  if (isa<IntrinsicInst>(CS.getInstruction()))
    return true;

  const Function *F = CS.getCalledFunction();
  if (!F) return false;

  if (F->hasLocalLinkage()) return false;

  if (!F->hasName()) return false;

  StringRef Name = F->getName();

  // These will all likely lower to a single selection DAG node.
  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
      Name == "sin" || Name == "sinf" || Name == "sinl" ||
      Name == "cos" || Name == "cosf" || Name == "cosl" ||
      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
    return true;

  // These are all likely to be optimized into something smaller.
  if (Name == "pow" || Name == "powf" || Name == "powl" ||
      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
      Name == "floor" || Name == "floorf" || Name == "ceil" ||
      Name == "round" || Name == "ffs" || Name == "ffsl" ||
      Name == "abs" || Name == "labs" || Name == "llabs")
    return true;

  return false;
}
Example #8
0
AliasAnalysis::ModRefResult
ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
  if (!EnableScopedNoAlias)
    return AliasAnalysis::getModRefInfo(CS1, CS2);

  if (!mayAliasInScopes(
          CS1.getInstruction()->getMDNode(LLVMContext::MD_alias_scope),
          CS2.getInstruction()->getMDNode(LLVMContext::MD_noalias)))
    return NoModRef;

  if (!mayAliasInScopes(
          CS2.getInstruction()->getMDNode(LLVMContext::MD_alias_scope),
          CS1.getInstruction()->getMDNode(LLVMContext::MD_noalias)))
    return NoModRef;

  return AliasAnalysis::getModRefInfo(CS1, CS2);
}
Example #9
0
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
                                                ImmutableCallSite CS2) {
  if (!EnableScopedNoAlias)
    return AAResultBase::getModRefInfo(CS1, CS2);

  if (!mayAliasInScopes(
          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
    return MRI_NoModRef;

  if (!mayAliasInScopes(
          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
    return MRI_NoModRef;

  return AAResultBase::getModRefInfo(CS1, CS2);
}
Example #10
0
// Helper to find the set of values described by a TSpecifier
std::set<const Value*>
getValues(const ImmutableCallSite cs, TSpecifier TS) {
  std::set<const Value*> Values;
  switch (TS) {
    case Ret:
      assert(!cs.getInstruction()->getType()->isVoidTy());
      Values.insert(cs.getInstruction());
      break;
    case Arg0:
      assert(0 < cs.arg_size());
      Values.insert(cs.getArgument(0));
      break;
    case Arg1:
      assert(1 < cs.arg_size());
      Values.insert(cs.getArgument(1));
      break;
    case Arg2:
      assert(2 < cs.arg_size());
      Values.insert(cs.getArgument(2));
      break;
    case Arg3:
      assert(3 < cs.arg_size());
      Values.insert(cs.getArgument(3));
      break;
    case Arg4:
      assert(4 < cs.arg_size());
      Values.insert(cs.getArgument(4));
      break;
    case AllArgs:
      assert(!cs.arg_empty());
      for (unsigned i = 0; i < cs.arg_size(); ++i)
        Values.insert(cs.getArgument(i));
      break;
    case VarArgs: {
      const Value *Callee = cs.getCalledValue()->stripPointerCasts();
      FunctionType *CalleeType =
        dyn_cast<FunctionType>(
          dyn_cast<PointerType>(Callee->getType())->getElementType()
          );
      for (unsigned i = CalleeType->getNumParams(); i < cs.arg_size(); ++i)
        Values.insert(cs.getArgument(i));
      break;
    }
  }
  return Values;
}
Example #11
0
ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
                                            const MemoryLocation &Loc) {
  if (!EnableTBAA)
    return AAResultBase::getModRefInfo(CS, Loc);

  if (const MDNode *L = Loc.AATags.TBAA)
    if (const MDNode *M =
            CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
      if (!Aliases(L, M))
        return MRI_NoModRef;

  return AAResultBase::getModRefInfo(CS, Loc);
}
std::vector<FlowRecord>
TaintReachable::process(const ContextID ctxt, const ImmutableCallSite cs) const {
  DEBUG(errs() << "Using taint reachable signature for: " << *cs.getInstruction() << "\n");

  FlowRecord exp(false,ctxt,ctxt);
  FlowRecord imp(true,ctxt,ctxt);

  // implicit from the pc of the call site and the function pointer
  imp.addSourceValue(*cs->getParent());
  imp.addSourceValue(*cs.getCalledValue());

  // Sources and sinks of the args
  for (ImmutableCallSite::arg_iterator arg = cs.arg_begin(), end = cs.arg_end();
       arg != end; ++arg) {
      // every argument's value is a source
      exp.addSourceValue(**arg);
      // if the argument is a pointer, everything it reaches is a source
      // and everything it reaches is a sink
      if ((*arg)->getType()->isPointerTy()) {
	exp.addSourceReachablePtr(**arg);
	imp.addSourceValue(**arg);

	exp.addSinkReachablePtr(**arg);
	imp.addSinkReachablePtr(**arg);
      }
  }

  // if the function has a return value it is a sink
  if (!cs->getType()->isVoidTy()) {
    imp.addSinkValue(*cs.getInstruction());
    exp.addSinkValue(*cs.getInstruction());
  }

  std::vector<FlowRecord> flows;
  flows.push_back(imp);
  flows.push_back(exp);
  return flows;
}
AliasAnalysis::ModRefResult
TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
                                      const Location &Loc) {
  if (!EnableTBAA)
    return AliasAnalysis::getModRefInfo(CS, Loc);

  if (const MDNode *L = Loc.TBAATag)
    if (const MDNode *M =
          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
      if (!Aliases(L, M))
        return NoModRef;

  return AliasAnalysis::getModRefInfo(CS, Loc);
}
AliasAnalysis::ModRefBehavior
TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
  if (!EnableTBAA)
    return AliasAnalysis::getModRefBehavior(CS);

  ModRefBehavior Min = UnknownModRefBehavior;

  // If this is an "immutable" type, we can assume the call doesn't write
  // to memory.
  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
    if (TBAANode(M).TypeIsImmutable())
      Min = OnlyReadsMemory;

  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
std::vector<FlowRecord>
ArgsToRet::process(const ContextID ctxt, const ImmutableCallSite cs) const {
  DEBUG(errs() << "Using ArgsToRet reachable signature for: " << *cs.getInstruction() << "\n");

  std::vector<FlowRecord> flows;

  if (!cs->getType()->isVoidTy()) {
    FlowRecord exp(false,ctxt,ctxt);

    // Sources and sinks of the args
    for (ImmutableCallSite::arg_iterator arg = cs.arg_begin(), end = cs.arg_end();
	 arg != end; ++arg) {
      // every argument's value is a source
      exp.addSourceValue(**arg);
    }

    // if the function has a return value it is a sink
    exp.addSinkValue(*cs.getInstruction());

    flows.push_back(exp);
  }

  return flows;
}
Example #16
0
FunctionModRefBehavior
TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
  if (!EnableTBAA)
    return AAResultBase::getModRefBehavior(CS);

  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;

  // If this is an "immutable" type, we can assume the call doesn't write
  // to memory.
  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
      Min = FMRB_OnlyReadsMemory;

  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
}
Example #17
0
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
                             const Location &Loc) {
  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");

  ModRefBehavior MRB = getModRefBehavior(CS);
  if (MRB == DoesNotAccessMemory)
    return NoModRef;

  ModRefResult Mask = ModRef;
  if (onlyReadsMemory(MRB))
    Mask = Ref;

  if (onlyAccessesArgPointees(MRB)) {
    bool doesAlias = false;
    if (doesAccessArgPointees(MRB)) {
      MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
           AI != AE; ++AI) {
        const Value *Arg = *AI;
        if (!Arg->getType()->isPointerTy())
          continue;
        Location CSLoc(Arg, UnknownSize, CSTag);
        if (!isNoAlias(CSLoc, Loc)) {
          doesAlias = true;
          break;
        }
      }
    }
    if (!doesAlias)
      return NoModRef;
  }

  // If Loc is a constant memory location, the call definitely could not
  // modify the memory location.
  if ((Mask & Mod) && pointsToConstantMemory(Loc))
    Mask = ModRefResult(Mask & ~Mod);

  // If this is the end of the chain, don't forward.
  if (!AA) return Mask;

  // Otherwise, fall back to the next AA in the chain. But we can merge
  // in any mask we've managed to compute.
  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
}
Example #18
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const Instruction *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      // A lifetime end intrinsic should not stop tail call optimization.
      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
        if (II->getIntrinsicID() == Intrinsic::lifetime_end)
          continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(&*BBI))
        return false;
    }

  const Function *F = ExitBB->getParent();
  return returnTypeIsEligibleForTailCall(
      F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
}
Example #19
0
AliasAnalysis::ModRefResult
AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
                                    const MemoryLocation &Loc) {
  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);

  const char *MRString = nullptr;
  switch (R) {
  case NoModRef: NoMR++;     MRString = "NoModRef"; break;
  case Ref:      JustRef++;  MRString = "JustRef"; break;
  case Mod:      JustMod++;  MRString = "JustMod"; break;
  case ModRef:   MR++;       MRString = "ModRef"; break;
  }

  if (PrintAll || (PrintAllFailures && R == ModRef)) {
    errs() << MRString << ":  Ptr: ";
    errs() << "[" << Loc.Size << "B] ";
    Loc.Ptr->printAsOperand(errs(), true, M);
    errs() << "\t<->" << *CS.getInstruction() << '\n';
  }
  return R;
}
AliasAnalysis::ModRefResult
AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
                                    const Location &Loc) {
  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);

  const char *MRString;
  switch (R) {
  default:       llvm_unreachable("Unknown mod/ref type!");
  case NoModRef: NoMR++;     MRString = "NoModRef"; break;
  case Ref:      JustRef++;  MRString = "JustRef"; break;
  case Mod:      JustMod++;  MRString = "JustMod"; break;
  case ModRef:   MR++;       MRString = "ModRef"; break;
  }

  if (PrintAll || (PrintAllFailures && R == ModRef)) {
    errs() << MRString << ":  Ptr: ";
    errs() << "[" << Loc.Size << "B] ";
    WriteAsOperand(errs(), Loc.Ptr, true, M);
    errs() << "\t<->" << *CS.getInstruction() << '\n';
  }
  return R;
}
std::vector<FlowRecord>
OverflowChecks::process(const ContextID ctxt, const ImmutableCallSite cs) const {
  DEBUG(errs() << "Using OverflowChecks signature...\n");

  FlowRecord exp(false,ctxt,ctxt);
  FlowRecord imp(true,ctxt,ctxt);

  imp.addSourceValue(*cs->getParent());

  // Add all argument values as sources
  for (ImmutableCallSite::arg_iterator arg = cs.arg_begin(), end = cs.arg_end();
       arg != end; ++arg)
    exp.addSourceValue(**arg);
  assert(!cs->getType()->isVoidTy() && "Found 'void' overflow check?");

  // And the return value as a sink
  exp.addSinkValue(*cs.getInstruction());
  imp.addSinkValue(*cs.getInstruction());

  std::vector<FlowRecord> flows;
  flows.push_back(imp);
  flows.push_back(exp);
  return flows;
}
Example #22
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
       !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(BBI))
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Make sure the attributes attached to each return are compatible.
  AttrBuilder CallerAttrs(ExitBB->getParent()->getAttributes(),
                          AttributeSet::ReturnIndex);
  AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
                          AttributeSet::ReturnIndex);

  // Noalias is completely benign as far as calling convention goes, it
  // shouldn't affect whether the call is a tail call.
  CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
  CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);

  bool AllowDifferingSizes = true;
  if (CallerAttrs.contains(Attribute::ZExt)) {
    if (!CalleeAttrs.contains(Attribute::ZExt))
      return false;

    AllowDifferingSizes = false;
    CallerAttrs.removeAttribute(Attribute::ZExt);
    CalleeAttrs.removeAttribute(Attribute::ZExt);
  } else if (CallerAttrs.contains(Attribute::SExt)) {
    if (!CalleeAttrs.contains(Attribute::SExt))
      return false;

    AllowDifferingSizes = false;
    CallerAttrs.removeAttribute(Attribute::SExt);
    CalleeAttrs.removeAttribute(Attribute::SExt);
  }

  // If they're still different, there's some facet we don't understand
  // (currently only "inreg", but in future who knows). It may be OK but the
  // only safe option is to reject the tail call.
  if (CallerAttrs != CalleeAttrs)
    return false;

  const Value *RetVal = Ret->getOperand(0), *CallVal = I;
  SmallVector<unsigned, 4> RetPath, CallPath;
  SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;

  bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
  bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);

  // Nothing's actually returned, it doesn't matter what the callee put there
  // it's a valid tail call.
  if (RetEmpty)
    return true;

  // Iterate pairwise through each of the value types making up the tail call
  // and the corresponding return. For each one we want to know whether it's
  // essentially going directly from the tail call to the ret, via operations
  // that end up not generating any code.
  //
  // We allow a certain amount of covariance here. For example it's permitted
  // for the tail call to define more bits than the ret actually cares about
  // (e.g. via a truncate).
  do {
    if (CallEmpty) {
      // We've exhausted the values produced by the tail call instruction, the
      // rest are essentially undef. The type doesn't really matter, but we need
      // *something*.
      Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
      CallVal = UndefValue::get(SlotType);
    }

    // The manipulations performed when we're looking through an insertvalue or
    // an extractvalue would happen at the front of the RetPath list, so since
    // we have to copy it anyway it's more efficient to create a reversed copy.
    using std::copy;
    SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
    copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
    copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));

    // Finally, we can check whether the value produced by the tail call at this
    // index is compatible with the value we return.
    if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
                              AllowDifferingSizes, TLI))
      return false;

    CallEmpty  = !nextRealType(CallSubTypes, CallPath);
  } while(nextRealType(RetSubTypes, RetPath));

  return true;
}
Example #23
0
// There are two types of constraints to add for a function call:
// - ValueNode(callsite) = ReturnNode(call target)
// - ValueNode(formal arg) = ValueNode(actual arg)
void Andersen::addConstraintForCall(ImmutableCallSite cs) {
  if (const Function *f = cs.getCalledFunction()) // Direct call
  {
    if (f->isDeclaration() || f->isIntrinsic()) // External library call
    {
      // Handle libraries separately
      if (addConstraintForExternalLibrary(cs, f))
        return;
      else // Unresolved library call: ruin everything!
      {
        errs() << "Unresolved ext function: " << f->getName() << "\n";
        if (cs.getType()->isPointerTy()) {
          NodeIndex retIndex = nodeFactory.getValueNodeFor(cs.getInstruction());
          assert(retIndex != AndersNodeFactory::InvalidIndex &&
                 "Failed to find ret node!");
          constraints.emplace_back(AndersConstraint::COPY, retIndex,
                                   nodeFactory.getUniversalPtrNode());
        }
        for (ImmutableCallSite::arg_iterator itr = cs.arg_begin(),
                                             ite = cs.arg_end();
             itr != ite; ++itr) {
          Value *argVal = *itr;
          if (argVal->getType()->isPointerTy()) {
            NodeIndex argIndex = nodeFactory.getValueNodeFor(argVal);
            assert(argIndex != AndersNodeFactory::InvalidIndex &&
                   "Failed to find arg node!");
            constraints.emplace_back(AndersConstraint::COPY, argIndex,
                                     nodeFactory.getUniversalPtrNode());
          }
        }
      }
    } else // Non-external function call
    {
      if (cs.getType()->isPointerTy()) {
        NodeIndex retIndex = nodeFactory.getValueNodeFor(cs.getInstruction());
        assert(retIndex != AndersNodeFactory::InvalidIndex &&
               "Failed to find ret node!");
        // errs() << f->getName() << "\n";
        NodeIndex fRetIndex = nodeFactory.getReturnNodeFor(f);
        assert(fRetIndex != AndersNodeFactory::InvalidIndex &&
               "Failed to find function ret node!");
        constraints.emplace_back(AndersConstraint::COPY, retIndex, fRetIndex);
      }
      // The argument constraints
      addArgumentConstraintForCall(cs, f);
    }
  } else // Indirect call
  {
    // We do the simplest thing here: just assume the returned value can be
    // anything :)
    if (cs.getType()->isPointerTy()) {
      NodeIndex retIndex = nodeFactory.getValueNodeFor(cs.getInstruction());
      assert(retIndex != AndersNodeFactory::InvalidIndex &&
             "Failed to find ret node!");
      constraints.emplace_back(AndersConstraint::COPY, retIndex,
                               nodeFactory.getUniversalPtrNode());
    }

    // For argument constraints, first search through all addr-taken functions:
    // any function that takes can take as many variables is a potential
    // candidate
    const Module *M =
        cs.getInstruction()->getParent()->getParent()->getParent();
    for (auto const &f : *M) {
      NodeIndex funPtrIndex = nodeFactory.getValueNodeFor(&f);
      if (funPtrIndex == AndersNodeFactory::InvalidIndex)
        // Not an addr-taken function
        continue;

      if (!f.getFunctionType()->isVarArg() && f.arg_size() != cs.arg_size())
        // #arg mismatch
        continue;

      if (f.isDeclaration() || f.isIntrinsic()) // External library call
      {
        if (addConstraintForExternalLibrary(cs, &f))
          continue;
        else {
          // Pollute everything
          for (ImmutableCallSite::arg_iterator itr = cs.arg_begin(),
                                               ite = cs.arg_end();
               itr != ite; ++itr) {
            Value *argVal = *itr;

            if (argVal->getType()->isPointerTy()) {
              NodeIndex argIndex = nodeFactory.getValueNodeFor(argVal);
              assert(argIndex != AndersNodeFactory::InvalidIndex &&
                     "Failed to find arg node!");
              constraints.emplace_back(AndersConstraint::COPY, argIndex,
                                       nodeFactory.getUniversalPtrNode());
            }
          }
        }
      } else
        addArgumentConstraintForCall(cs, &f);
    }
  }
}
Example #24
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
       !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(BBI))
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Conservatively require the attributes of the call to match those of
  // the return. Ignore noalias because it doesn't affect the call sequence.
  const Function *F = ExitBB->getParent();
  Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) !=
      AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias))
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
      CallerRetAttr.hasAttribute(Attribute::SExt))
    return false;

  // Otherwise, make sure the unmodified return value of I is the return value.
  // We handle two cases: multiple return values + scalars.
  Value *RetVal = Ret->getOperand(0);
  if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
    // Handle scalars first.
    return getNoopInput(Ret->getOperand(0), TLI) == I;
  
  // If this is an aggregate return, look through the insert/extract values and
  // see if each is transparent.
  for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
       i != e; ++i) {
    const Value *InScalar = FindInsertedValue(RetVal, i);
    if (InScalar == 0) return false;
    InScalar = getNoopInput(InScalar, TLI);
    
    // If the scalar value being inserted is an extractvalue of the right index
    // from the call, then everything is good.
    const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
    if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
        EVI->getIndices()[0] != i)
      return false;
  }
  
  return true;
}
Example #25
0
        virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
                const Location &Loc) {
            ModRefResult base = AliasAnalysis::getModRefInfo(CS, Loc);
            if (!CS.getCalledFunction())
                return base;

            if (VERBOSITY("opt.aa") >= 2) {
                errs() << "getModRefInfo():\n";
                CS->dump();
                Loc.Ptr->dump();
                outs() << "base: " << base << '\n';
            }

            ModRefResult mask = ModRef;

            StringRef name = CS.getCalledFunction()->getName();
            if (isAllocCall(name)) {
                return NoModRef;
            }

            EscapeAnalysis &escape = getAnalysis<EscapeAnalysis>();
            EscapeAnalysis::EscapeResult escapes = escape.escapes(Loc.Ptr, CS.getInstruction());
            if (escapes != EscapeAnalysis::Escaped) {
                StatCounter num_improved("opt_modref_noescape");
                num_improved.log();
                if (VERBOSITY("opt.aa") >= 2) {
                    errs() << "Was able to show that " << *CS.getInstruction() << " can't modify " << *Loc.Ptr << '\n';
                }
                return NoModRef;
            }

            /*if (name == "printf" || name == "my_realloc" || name == "print_space_if_necessary" || name == "write") {
                mask = Ref;
                bool found_alias = false;
                for (User::const_op_iterator op_it = CS.arg_begin(), op_end = CS.arg_end(); op_it != op_end; ++op_it) {
                    if (alias(Loc, Location(op_it->get())) != NoAlias) {
                        found_alias = true;
                        break;
                    }
                }
                if (!found_alias)
                    mask = NoModRef;
            } else if (name == "snprintf" || name == "str_decref" || name == "read" || name == "file_write") {
                mask = ModRef;
                bool found_alias = false;
                for (User::const_op_iterator op_it = CS.arg_begin(), op_end = CS.arg_end(); op_it != op_end; ++op_it) {
                    if (alias(Loc, Location(op_it->get())) != NoAlias) {
                        //errs() << '\n';
                        //errs() << *CS.getInstruction() << '\n';
                        //errs() << **op_it << '\n';
                        found_alias = true;
                        break;
                    }
                }
                if (!found_alias) {
                    mask = NoModRef;
                }
            } else if (name == "my_free" || name == "my_malloc" || name == "close" || name == "int_repr") {
                mask = NoModRef;
            }*/

            return ModRefResult(mask & base);
        }
Example #26
0
MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
                                              unsigned ArgIdx,
                                              const TargetLibraryInfo &TLI) {
  AAMDNodes AATags;
  CS->getAAMetadata(AATags);
  const Value *Arg = CS.getArgument(ArgIdx);

  // We may be able to produce an exact size for known intrinsics.
  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
    const DataLayout &DL = II->getModule()->getDataLayout();

    switch (II->getIntrinsicID()) {
    default:
      break;
    case Intrinsic::memset:
    case Intrinsic::memcpy:
    case Intrinsic::memmove:
      assert((ArgIdx == 0 || ArgIdx == 1) &&
             "Invalid argument index for memory intrinsic");
      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
        return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
      break;

    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
    case Intrinsic::invariant_start:
      assert(ArgIdx == 1 && "Invalid argument index");
      return MemoryLocation(
          Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);

    case Intrinsic::invariant_end:
      // The first argument to an invariant.end is a "descriptor" type (e.g. a
      // pointer to a empty struct) which is never actually dereferenced.
      if (ArgIdx == 0)
        return MemoryLocation(Arg, 0, AATags);
      assert(ArgIdx == 2 && "Invalid argument index");
      return MemoryLocation(
          Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);

    case Intrinsic::arm_neon_vld1:
      assert(ArgIdx == 0 && "Invalid argument index");
      // LLVM's vld1 and vst1 intrinsics currently only support a single
      // vector register.
      return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);

    case Intrinsic::arm_neon_vst1:
      assert(ArgIdx == 0 && "Invalid argument index");
      return MemoryLocation(
          Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
    }
  }

  // We can bound the aliasing properties of memset_pattern16 just as we can
  // for memcpy/memset.  This is particularly important because the
  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
  // whenever possible.
  LibFunc F;
  if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
      F == LibFunc_memset_pattern16 && TLI.has(F)) {
    assert((ArgIdx == 0 || ArgIdx == 1) &&
           "Invalid argument index for memset_pattern16");
    if (ArgIdx == 1)
      return MemoryLocation(Arg, 16, AATags);
    if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
      return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
  }
  // FIXME: Handle memset_pattern4 and memset_pattern8 also.

  return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
}
Example #27
0
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");

  // If CS1 or CS2 are readnone, they don't interact.
  ModRefBehavior CS1B = getModRefBehavior(CS1);
  if (CS1B == DoesNotAccessMemory) return NoModRef;

  ModRefBehavior CS2B = getModRefBehavior(CS2);
  if (CS2B == DoesNotAccessMemory) return NoModRef;

  // If they both only read from memory, there is no dependence.
  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
    return NoModRef;

  AliasAnalysis::ModRefResult Mask = ModRef;

  // If CS1 only reads memory, the only dependence on CS2 can be
  // from CS1 reading memory written by CS2.
  if (onlyReadsMemory(CS1B))
    Mask = ModRefResult(Mask & Ref);

  // If CS2 only access memory through arguments, accumulate the mod/ref
  // information from CS1's references to the memory referenced by
  // CS2's arguments.
  if (onlyAccessesArgPointees(CS2B)) {
    AliasAnalysis::ModRefResult R = NoModRef;
    if (doesAccessArgPointees(CS2B)) {
      MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
      for (ImmutableCallSite::arg_iterator
           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
        const Value *Arg = *I;
        if (!Arg->getType()->isPointerTy())
          continue;
        Location CS2Loc(Arg, UnknownSize, CS2Tag);
        R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
        if (R == Mask)
          break;
      }
    }
    return R;
  }

  // If CS1 only accesses memory through arguments, check if CS2 references
  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
  if (onlyAccessesArgPointees(CS1B)) {
    AliasAnalysis::ModRefResult R = NoModRef;
    if (doesAccessArgPointees(CS1B)) {
      MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
      for (ImmutableCallSite::arg_iterator
           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
        const Value *Arg = *I;
        if (!Arg->getType()->isPointerTy())
          continue;
        Location CS1Loc(Arg, UnknownSize, CS1Tag);
        if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
          R = Mask;
          break;
        }
      }
    }
    if (R == NoModRef)
      return R;
  }

  // If this is the end of the chain, don't forward.
  if (!AA) return Mask;

  // Otherwise, fall back to the next AA in the chain. But we can merge
  // in any mask we've managed to compute.
  return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
}
Example #28
0
SDValue
NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
  SelectionDAG &DAG                     = CLI.DAG;
  DebugLoc &dl                          = CLI.DL;
  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
  SDValue Chain                         = CLI.Chain;
  SDValue Callee                        = CLI.Callee;
  bool &isTailCall                      = CLI.IsTailCall;
  ArgListTy &Args                       = CLI.Args;
  Type *retTy                           = CLI.RetTy;
  ImmutableCallSite *CS                 = CLI.CS;

  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);

  SDValue tempChain = Chain;
  Chain = DAG.getCALLSEQ_START(Chain,
                               DAG.getIntPtrConstant(uniqueCallSite, true));
  SDValue InFlag = Chain.getValue(1);

  assert((Outs.size() == Args.size()) &&
         "Unexpected number of arguments to function call");
  unsigned paramCount = 0;
  // Declare the .params or .reg need to pass values
  // to the function
  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
    EVT VT = Outs[i].VT;

    if (Outs[i].Flags.isByVal() == false) {
      // Plain scalar
      // for ABI,    declare .param .b<size> .param<n>;
      // for nonABI, declare .reg .b<size> .param<n>;
      unsigned isReg = 1;
      if (isABI)
        isReg = 0;
      unsigned sz = VT.getSizeInBits();
      if (VT.isInteger() && (sz < 32)) sz = 32;
      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
      SDValue DeclareParamOps[] = { Chain,
                                    DAG.getConstant(paramCount, MVT::i32),
                                    DAG.getConstant(sz, MVT::i32),
                                    DAG.getConstant(isReg, MVT::i32),
                                    InFlag };
      Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                          DeclareParamOps, 5);
      InFlag = Chain.getValue(1);
      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };

      unsigned opcode = NVPTXISD::StoreParam;
      if (isReg)
        opcode = NVPTXISD::MoveToParam;
      else {
        if (Outs[i].Flags.isZExt())
          opcode = NVPTXISD::StoreParamU32;
        else if (Outs[i].Flags.isSExt())
          opcode = NVPTXISD::StoreParamS32;
      }
      Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);

      InFlag = Chain.getValue(1);
      ++paramCount;
      continue;
    }
    // struct or vector
    SmallVector<EVT, 16> vtparts;
    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
    assert(PTy &&
           "Type of a byval parameter should be pointer");
    ComputeValueVTs(*this, PTy->getElementType(), vtparts);

    if (isABI) {
      // declare .param .align 16 .b8 .param<n>[<size>];
      unsigned sz = Outs[i].Flags.getByValSize();
      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
      // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
      // don't need to
      // worry about natural alignment or not. See TargetLowering::LowerCallTo()
      SDValue DeclareParamOps[] = { Chain,
                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
                                    DAG.getConstant(paramCount, MVT::i32),
                                    DAG.getConstant(sz, MVT::i32),
                                    InFlag };
      Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                          DeclareParamOps, 5);
      InFlag = Chain.getValue(1);
      unsigned curOffset = 0;
      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
        unsigned elems = 1;
        EVT elemtype = vtparts[j];
        if (vtparts[j].isVector()) {
          elems = vtparts[j].getVectorNumElements();
          elemtype = vtparts[j].getVectorElementType();
        }
        for (unsigned k=0,ke=elems; k!=ke; ++k) {
          unsigned sz = elemtype.getSizeInBits();
          if (elemtype.isInteger() && (sz < 8)) sz = 8;
          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                        OutVals[i],
                                        DAG.getConstant(curOffset,
                                                        getPointerTy()));
          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
                                MachinePointerInfo(), false, false, false, 0);
          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
                                                            MVT::i32),
                                           DAG.getConstant(curOffset, MVT::i32),
                                                            theVal, InFlag };
          Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
                              CopyParamOps, 5);
          InFlag = Chain.getValue(1);
          curOffset += sz/8;
        }
      }
      ++paramCount;
      continue;
    }
    // Non-abi, struct or vector
    // Declare a bunch or .reg .b<size> .param<n>
    unsigned curOffset = 0;
    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
      unsigned elems = 1;
      EVT elemtype = vtparts[j];
      if (vtparts[j].isVector()) {
        elems = vtparts[j].getVectorNumElements();
        elemtype = vtparts[j].getVectorElementType();
      }
      for (unsigned k=0,ke=elems; k!=ke; ++k) {
        unsigned sz = elemtype.getSizeInBits();
        if (elemtype.isInteger() && (sz < 32)) sz = 32;
        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
                                                             MVT::i32),
                                                  DAG.getConstant(sz, MVT::i32),
                                                   DAG.getConstant(1, MVT::i32),
                                                             InFlag };
        Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                            DeclareParamOps, 5);
        InFlag = Chain.getValue(1);
        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
                                      DAG.getConstant(curOffset,
                                                      getPointerTy()));
        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
                                  MachinePointerInfo(), false, false, false, 0);
        SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
        SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
                                   DAG.getConstant(0, MVT::i32), theVal,
                                   InFlag };
        Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
                            CopyParamOps, 5);
        InFlag = Chain.getValue(1);
        ++paramCount;
      }
    }
  }

  GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
  unsigned retAlignment = 0;

  // Handle Result
  unsigned retCount = 0;
  if (Ins.size() > 0) {
    SmallVector<EVT, 16> resvtparts;
    ComputeValueVTs(*this, retTy, resvtparts);

    // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
    // individual .reg .b<size> func_retval<0..> for non ABI
    unsigned resultsz = 0;
    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
      unsigned elems = 1;
      EVT elemtype = resvtparts[i];
      if (resvtparts[i].isVector()) {
        elems = resvtparts[i].getVectorNumElements();
        elemtype = resvtparts[i].getVectorElementType();
      }
      for (unsigned j=0,je=elems; j!=je; ++j) {
        unsigned sz = elemtype.getSizeInBits();
        if (isABI == false) {
          if (elemtype.isInteger() && (sz < 32)) sz = 32;
        }
        else {
          if (elemtype.isInteger() && (sz < 8)) sz = 8;
        }
        if (isABI == false) {
          SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
          SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
                                      DAG.getConstant(sz, MVT::i32),
                                      DAG.getConstant(retCount, MVT::i32),
                                      InFlag };
          Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                              DeclareRetOps, 5);
          InFlag = Chain.getValue(1);
          ++retCount;
        }
        resultsz += sz;
      }
    }
    if (isABI) {
      if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
          retTy->isPointerTy() ) {
        // Scalar needs to be at least 32bit wide
        if (resultsz < 32)
          resultsz = 32;
        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
                                    DAG.getConstant(resultsz, MVT::i32),
                                    DAG.getConstant(0, MVT::i32), InFlag };
        Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                            DeclareRetOps, 5);
        InFlag = Chain.getValue(1);
      }
      else {
        if (Func) { // direct call
          if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
            retAlignment = getDataLayout()->getABITypeAlignment(retTy);
        } else { // indirect call
          const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
          if (!llvm::getAlign(*CallI, 0, retAlignment))
            retAlignment = getDataLayout()->getABITypeAlignment(retTy);
        }
        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
                                                           MVT::i32),
                                          DAG.getConstant(resultsz/8, MVT::i32),
                                         DAG.getConstant(0, MVT::i32), InFlag };
        Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
                            DeclareRetOps, 5);
        InFlag = Chain.getValue(1);
      }
    }
  }

  if (!Func) {
    // This is indirect function call case : PTX requires a prototype of the
    // form
    // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
    // to be emitted, and the label has to used as the last arg of call
    // instruction.
    // The prototype is embedded in a string and put as the operand for an
    // INLINEASM SDNode.
    SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
    const char *asmstr = nvTM->getManagedStrPool()->
        getManagedString(proto_string.c_str())->c_str();
    SDValue InlineAsmOps[] = { Chain,
                               DAG.getTargetExternalSymbol(asmstr,
                                                           getPointerTy()),
                                                           DAG.getMDNode(0),
                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
    Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
    InFlag = Chain.getValue(1);
  }
  // Op to just print "call"
  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue PrintCallOps[] = { Chain,
                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
                                 : retCount, MVT::i32),
                                   InFlag };
  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
      PrintCallVTs, PrintCallOps, 3);
  InFlag = Chain.getValue(1);

  // Ops to print out the function name
  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
  InFlag = Chain.getValue(1);

  // Ops to print out the param list
  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue CallArgBeginOps[] = { Chain, InFlag };
  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
                      CallArgBeginOps, 2);
  InFlag = Chain.getValue(1);

  for (unsigned i=0, e=paramCount; i!=e; ++i) {
    unsigned opcode;
    if (i==(e-1))
      opcode = NVPTXISD::LastCallArg;
    else
      opcode = NVPTXISD::CallArg;
    SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
                             DAG.getConstant(i, MVT::i32),
                             InFlag };
    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
    InFlag = Chain.getValue(1);
  }
  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue CallArgEndOps[] = { Chain,
                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
                              InFlag };
  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
                      3);
  InFlag = Chain.getValue(1);

  if (!Func) {
    SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    SDValue PrototypeOps[] = { Chain,
                               DAG.getConstant(uniqueCallSite, MVT::i32),
                               InFlag };
    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
    InFlag = Chain.getValue(1);
  }

  // Generate loads from param memory/moves from registers for result
  if (Ins.size() > 0) {
    if (isABI) {
      unsigned resoffset = 0;
      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
        unsigned sz = Ins[i].VT.getSizeInBits();
        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
        std::vector<EVT> LoadRetVTs;
        LoadRetVTs.push_back(Ins[i].VT);
        LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
        std::vector<SDValue> LoadRetOps;
        LoadRetOps.push_back(Chain);
        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
        LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
        LoadRetOps.push_back(InFlag);
        SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
                                     &LoadRetOps[0], LoadRetOps.size());
        Chain = retval.getValue(1);
        InFlag = retval.getValue(2);
        InVals.push_back(retval);
        resoffset += sz/8;
      }
    }
    else {
      SmallVector<EVT, 16> resvtparts;
      ComputeValueVTs(*this, retTy, resvtparts);

      assert(Ins.size() == resvtparts.size() &&
             "Unexpected number of return values in non-ABI case");
      unsigned paramNum = 0;
      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
        assert(EVT(Ins[i].VT) == resvtparts[i] &&
               "Unexpected EVT type in non-ABI case");
        unsigned numelems = 1;
        EVT elemtype = Ins[i].VT;
        if (Ins[i].VT.isVector()) {
          numelems = Ins[i].VT.getVectorNumElements();
          elemtype = Ins[i].VT.getVectorElementType();
        }
        std::vector<SDValue> tempRetVals;
        for (unsigned j=0; j<numelems; ++j) {
          std::vector<EVT> MoveRetVTs;
          MoveRetVTs.push_back(elemtype);
          MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
          std::vector<SDValue> MoveRetOps;
          MoveRetOps.push_back(Chain);
          MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
          MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
          MoveRetOps.push_back(InFlag);
          SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
                                       &MoveRetOps[0], MoveRetOps.size());
          Chain = retval.getValue(1);
          InFlag = retval.getValue(2);
          tempRetVals.push_back(retval);
          ++paramNum;
        }
        if (Ins[i].VT.isVector())
          InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
                                       &tempRetVals[0], tempRetVals.size()));
        else
          InVals.push_back(tempRetVals[0]);
      }
    }
  }
  Chain = DAG.getCALLSEQ_END(Chain,
                             DAG.getIntPtrConstant(uniqueCallSite, true),
                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
                             InFlag);
  uniqueCallSite++;

  // set isTailCall to false for now, until we figure out how to express
  // tail call optimization in PTX
  isTailCall = false;
  return Chain;
}
Example #29
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !I->isSafeToSpeculativelyExecute())
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !BBI->isSafeToSpeculativelyExecute())
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Conservatively require the attributes of the call to match those of
  // the return. Ignore noalias because it doesn't affect the call sequence.
  const Function *F = ExitBB->getParent();
  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
    return false;

  // Otherwise, make sure the unmodified return value of I is the return value.
  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
       U = dyn_cast<Instruction>(U->getOperand(0))) {
    if (!U)
      return false;
    if (!U->hasOneUse())
      return false;
    if (U == I)
      break;
    // Check for a truly no-op truncate.
    if (isa<TruncInst>(U) &&
        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
      continue;
    // Check for a truly no-op bitcast.
    if (isa<BitCastInst>(U) &&
        (U->getOperand(0)->getType() == U->getType() ||
         (U->getOperand(0)->getType()->isPointerTy() &&
          U->getType()->isPointerTy())))
      continue;
    // Otherwise it's not a true no-op.
    return false;
  }

  return true;
}