Example #1
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const Instruction *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      // A lifetime end intrinsic should not stop tail call optimization.
      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
        if (II->getIntrinsicID() == Intrinsic::lifetime_end)
          continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(&*BBI))
        return false;
    }

  const Function *F = ExitBB->getParent();
  return returnTypeIsEligibleForTailCall(
      F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
}
Example #2
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attribute CalleeRetAttr,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
       !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(BBI))
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Conservatively require the attributes of the call to match those of
  // the return. Ignore noalias because it doesn't affect the call sequence.
  const Function *F = ExitBB->getParent();
  Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attribute::NoAlias) !=
      AttrBuilder(CallerRetAttr).removeAttribute(Attribute::NoAlias))
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
      CallerRetAttr.hasAttribute(Attribute::SExt))
    return false;

  // Otherwise, make sure the unmodified return value of I is the return value.
  // We handle two cases: multiple return values + scalars.
  Value *RetVal = Ret->getOperand(0);
  if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
    // Handle scalars first.
    return getNoopInput(Ret->getOperand(0), TLI) == I;
  
  // If this is an aggregate return, look through the insert/extract values and
  // see if each is transparent.
  for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
       i != e; ++i) {
    const Value *InScalar = FindInsertedValue(RetVal, i);
    if (InScalar == 0) return false;
    InScalar = getNoopInput(InScalar, TLI);
    
    // If the scalar value being inserted is an extractvalue of the right index
    // from the call, then everything is good.
    const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
    if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
        EVI->getIndices()[0] != i)
      return false;
  }
  
  return true;
}
Example #3
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !I->isSafeToSpeculativelyExecute())
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !BBI->isSafeToSpeculativelyExecute())
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Conservatively require the attributes of the call to match those of
  // the return. Ignore noalias because it doesn't affect the call sequence.
  const Function *F = ExitBB->getParent();
  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
    return false;

  // Otherwise, make sure the unmodified return value of I is the return value.
  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
       U = dyn_cast<Instruction>(U->getOperand(0))) {
    if (!U)
      return false;
    if (!U->hasOneUse())
      return false;
    if (U == I)
      break;
    // Check for a truly no-op truncate.
    if (isa<TruncInst>(U) &&
        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
      continue;
    // Check for a truly no-op bitcast.
    if (isa<BitCastInst>(U) &&
        (U->getOperand(0)->getType() == U->getType() ||
         (U->getOperand(0)->getType()->isPointerTy() &&
          U->getType()->isPointerTy())))
      continue;
    // Otherwise it's not a true no-op.
    return false;
  }

  return true;
}
Example #4
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
       !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(BBI))
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Make sure the attributes attached to each return are compatible.
  AttrBuilder CallerAttrs(ExitBB->getParent()->getAttributes(),
                          AttributeSet::ReturnIndex);
  AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
                          AttributeSet::ReturnIndex);

  // Noalias is completely benign as far as calling convention goes, it
  // shouldn't affect whether the call is a tail call.
  CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
  CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);

  bool AllowDifferingSizes = true;
  if (CallerAttrs.contains(Attribute::ZExt)) {
    if (!CalleeAttrs.contains(Attribute::ZExt))
      return false;

    AllowDifferingSizes = false;
    CallerAttrs.removeAttribute(Attribute::ZExt);
    CalleeAttrs.removeAttribute(Attribute::ZExt);
  } else if (CallerAttrs.contains(Attribute::SExt)) {
    if (!CalleeAttrs.contains(Attribute::SExt))
      return false;

    AllowDifferingSizes = false;
    CallerAttrs.removeAttribute(Attribute::SExt);
    CalleeAttrs.removeAttribute(Attribute::SExt);
  }

  // If they're still different, there's some facet we don't understand
  // (currently only "inreg", but in future who knows). It may be OK but the
  // only safe option is to reject the tail call.
  if (CallerAttrs != CalleeAttrs)
    return false;

  const Value *RetVal = Ret->getOperand(0), *CallVal = I;
  SmallVector<unsigned, 4> RetPath, CallPath;
  SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;

  bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
  bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);

  // Nothing's actually returned, it doesn't matter what the callee put there
  // it's a valid tail call.
  if (RetEmpty)
    return true;

  // Iterate pairwise through each of the value types making up the tail call
  // and the corresponding return. For each one we want to know whether it's
  // essentially going directly from the tail call to the ret, via operations
  // that end up not generating any code.
  //
  // We allow a certain amount of covariance here. For example it's permitted
  // for the tail call to define more bits than the ret actually cares about
  // (e.g. via a truncate).
  do {
    if (CallEmpty) {
      // We've exhausted the values produced by the tail call instruction, the
      // rest are essentially undef. The type doesn't really matter, but we need
      // *something*.
      Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
      CallVal = UndefValue::get(SlotType);
    }

    // The manipulations performed when we're looking through an insertvalue or
    // an extractvalue would happen at the front of the RetPath list, so since
    // we have to copy it anyway it's more efficient to create a reversed copy.
    using std::copy;
    SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
    copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
    copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));

    // Finally, we can check whether the value produced by the tail call at this
    // index is compatible with the value we return.
    if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
                              AllowDifferingSizes, TLI))
      return false;

    CallEmpty  = !nextRealType(CallSubTypes, CallPath);
  } while(nextRealType(RetSubTypes, RetPath));

  return true;
}