示例#1
0
int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
  switch (I->getOpcode()) {
  case Instruction::GetElementPtr:
    return getUserCost(I);

  case Instruction::Ret:
  case Instruction::PHI:
  case Instruction::Br: {
    return getCFInstrCost(I->getOpcode());
  }
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    TargetTransformInfo::OperandValueKind Op1VK =
      getOperandInfo(I->getOperand(0));
    TargetTransformInfo::OperandValueKind Op2VK =
      getOperandInfo(I->getOperand(1));
    SmallVector<const Value*, 2> Operands(I->operand_values());
    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
                                       Op2VK, TargetTransformInfo::OP_None,
                                       TargetTransformInfo::OP_None,
                                       Operands);
  }
  case Instruction::Select: {
    const SelectInst *SI = cast<SelectInst>(I);
    Type *CondTy = SI->getCondition()->getType();
    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
  }
  case Instruction::ICmp:
  case Instruction::FCmp: {
    Type *ValTy = I->getOperand(0)->getType();
    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
  }
  case Instruction::Store: {
    const StoreInst *SI = cast<StoreInst>(I);
    Type *ValTy = SI->getValueOperand()->getType();
    return getMemoryOpCost(I->getOpcode(), ValTy,
                                SI->getAlignment(),
                                SI->getPointerAddressSpace(), I);
  }
  case Instruction::Load: {
    const LoadInst *LI = cast<LoadInst>(I);
    return getMemoryOpCost(I->getOpcode(), I->getType(),
                                LI->getAlignment(),
                                LI->getPointerAddressSpace(), I);
  }
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::FPExt:
  case Instruction::PtrToInt:
  case Instruction::IntToPtr:
  case Instruction::SIToFP:
  case Instruction::UIToFP:
  case Instruction::Trunc:
  case Instruction::FPTrunc:
  case Instruction::BitCast:
  case Instruction::AddrSpaceCast: {
    Type *SrcTy = I->getOperand(0)->getType();
    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
  }
  case Instruction::ExtractElement: {
    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
    unsigned Idx = -1;
    if (CI)
      Idx = CI->getZExtValue();

    // Try to match a reduction sequence (series of shufflevector and vector
    // adds followed by a extractelement).
    unsigned ReduxOpCode;
    Type *ReduxType;

    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
    case RK_Arithmetic:
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
                                             /*IsPairwiseForm=*/false);
    case RK_MinMax:
      return getMinMaxReductionCost(
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
          /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
    case RK_UnsignedMinMax:
      return getMinMaxReductionCost(
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
          /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
    case RK_None:
      break;
    }

    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
    case RK_Arithmetic:
      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
                                             /*IsPairwiseForm=*/true);
    case RK_MinMax:
      return getMinMaxReductionCost(
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
          /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
    case RK_UnsignedMinMax:
      return getMinMaxReductionCost(
          ReduxType, CmpInst::makeCmpResultType(ReduxType),
          /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
    case RK_None:
      break;
    }

    return getVectorInstrCost(I->getOpcode(),
                                   EEI->getOperand(0)->getType(), Idx);
  }
  case Instruction::InsertElement: {
    const InsertElementInst * IE = cast<InsertElementInst>(I);
    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
    unsigned Idx = -1; 
    if (CI)
      Idx = CI->getZExtValue();
    return getVectorInstrCost(I->getOpcode(),
                                   IE->getType(), Idx);
  }
  case Instruction::ShuffleVector: {
    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

    if (NumVecElems == Mask.size()) {
      if (isReverseVectorMask(Mask))
        return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
                                   0, nullptr);
      if (isAlternateVectorMask(Mask))
        return getShuffleCost(TargetTransformInfo::SK_Alternate,
                                   VecTypOp0, 0, nullptr);

      if (isZeroEltBroadcastVectorMask(Mask))
        return getShuffleCost(TargetTransformInfo::SK_Broadcast,
                                   VecTypOp0, 0, nullptr);

      if (isSingleSourceVectorMask(Mask))
        return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
                                   VecTypOp0, 0, nullptr);

      return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
                                 VecTypOp0, 0, nullptr);
    }

    return -1;
  }
  case Instruction::Call:
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
      SmallVector<Value *, 4> Args(II->arg_operands());

      FastMathFlags FMF;
      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
        FMF = FPMO->getFastMathFlags();

      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
                                        Args, FMF);
    }
    return -1;
  default:
    // We don't have any information on this instruction.
    return -1;
  }
}
示例#2
0
unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
  if (!TTI)
    return -1;

  switch (I->getOpcode()) {
  case Instruction::GetElementPtr:{
    Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
    return TTI->getAddressComputationCost(ValTy);
  }

  case Instruction::Ret:
  case Instruction::PHI:
  case Instruction::Br: {
    return TTI->getCFInstrCost(I->getOpcode());
  }
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
  }
  case Instruction::Select: {
    const SelectInst *SI = cast<SelectInst>(I);
    Type *CondTy = SI->getCondition()->getType();
    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
  }
  case Instruction::ICmp:
  case Instruction::FCmp: {
    Type *ValTy = I->getOperand(0)->getType();
    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
  }
  case Instruction::Store: {
    const StoreInst *SI = cast<StoreInst>(I);
    Type *ValTy = SI->getValueOperand()->getType();
    return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
                                 SI->getAlignment(),
                                 SI->getPointerAddressSpace());
  }
  case Instruction::Load: {
    const LoadInst *LI = cast<LoadInst>(I);
    return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
                                 LI->getAlignment(),
                                 LI->getPointerAddressSpace());
  }
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::FPExt:
  case Instruction::PtrToInt:
  case Instruction::IntToPtr:
  case Instruction::SIToFP:
  case Instruction::UIToFP:
  case Instruction::Trunc:
  case Instruction::FPTrunc:
  case Instruction::BitCast: {
    Type *SrcTy = I->getOperand(0)->getType();
    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
  }
  case Instruction::ExtractElement: {
    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
    unsigned Idx = -1;
    if (CI)
      Idx = CI->getZExtValue();
    return TTI->getVectorInstrCost(I->getOpcode(),
                                   EEI->getOperand(0)->getType(), Idx);
  }
  case Instruction::InsertElement: {
      const InsertElementInst * IE = cast<InsertElementInst>(I);
      ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
      unsigned Idx = -1;
      if (CI)
        Idx = CI->getZExtValue();
      return TTI->getVectorInstrCost(I->getOpcode(),
                                     IE->getType(), Idx);
    }
  case Instruction::ShuffleVector: {
    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();

    if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
      return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
                                 0);
    return -1;
  }
  case Instruction::Call:
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
      SmallVector<Type*, 4> Tys;
      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
        Tys.push_back(II->getArgOperand(J)->getType());

      return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
                                        Tys);
    }
    return -1;
  default:
    // We don't have any information on this instruction.
    return -1;
  }
}
示例#3
0
/// CloneBlock - The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                       std::vector<const BasicBlock*> &ToClone){
  WeakVH &BBEntry = VMap[BB];

  // Have we already cloned this block?
  if (BBEntry) return;
  
  // Nope, clone it now.
  BasicBlock *NewBB;
  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);

  // It is only legal to clone a function if a block address within that
  // function is never referenced outside of the function.  Given that, we
  // want to map block addresses from the old function to block addresses in
  // the clone. (This is different from the generic ValueMapper
  // implementation, which generates an invalid blockaddress when
  // cloning a function.)
  //
  // Note that we don't need to fix the mapping for unreachable blocks;
  // the default mapping there is safe.
  if (BB->hasAddressTaken()) {
    Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
                                            const_cast<BasicBlock*>(BB));
    VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
  }
    

  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
  
  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
  // loop doesn't include the terminator.
  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
       II != IE; ++II) {
    Instruction *NewInst = II->clone();

    // Eagerly remap operands to the newly cloned instruction, except for PHI
    // nodes for which we defer processing until we update the CFG.
    if (!isa<PHINode>(NewInst)) {
      RemapInstruction(NewInst, VMap,
                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);

      // If we can simplify this instruction to some other value, simply add
      // a mapping to that value rather than inserting a new instruction into
      // the basic block.
      if (Value *V = SimplifyInstruction(NewInst, TD)) {
        // On the off-chance that this simplifies to an instruction in the old
        // function, map it back into the new function.
        if (Value *MappedV = VMap.lookup(V))
          V = MappedV;

        VMap[II] = V;
        delete NewInst;
        continue;
      }
    }

    if (II->hasName())
      NewInst->setName(II->getName()+NameSuffix);
    VMap[II] = NewInst;                // Add instruction map to value.
    NewBB->getInstList().push_back(NewInst);
    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
      if (isa<ConstantInt>(AI->getArraySize()))
        hasStaticAllocas = true;
      else
        hasDynamicAllocas = true;
    }
  }
  
  // Finally, clone over the terminator.
  const TerminatorInst *OldTI = BB->getTerminator();
  bool TerminatorDone = false;
  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
    if (BI->isConditional()) {
      // If the condition was a known constant in the callee...
      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
      // Or is a known constant in the caller...
      if (Cond == 0) {
        Value *V = VMap[BI->getCondition()];
        Cond = dyn_cast_or_null<ConstantInt>(V);
      }

      // Constant fold to uncond branch!
      if (Cond) {
        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
        ToClone.push_back(Dest);
        TerminatorDone = true;
      }
    }
  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
    // If switching on a value known constant in the caller.
    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
    if (Cond == 0) { // Or known constant after constant prop in the callee...
      Value *V = VMap[SI->getCondition()];
      Cond = dyn_cast_or_null<ConstantInt>(V);
    }
    if (Cond) {     // Constant fold to uncond branch!
      SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
      BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
      ToClone.push_back(Dest);
      TerminatorDone = true;
    }
  }
  
  if (!TerminatorDone) {
    Instruction *NewInst = OldTI->clone();
    if (OldTI->hasName())
      NewInst->setName(OldTI->getName()+NameSuffix);
    NewBB->getInstList().push_back(NewInst);
    VMap[OldTI] = NewInst;             // Add instruction map to value.
    
    // Recursively clone any reachable successor blocks.
    const TerminatorInst *TI = BB->getTerminator();
    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
      ToClone.push_back(TI->getSuccessor(i));
  }
  
  if (CodeInfo) {
    CodeInfo->ContainsCalls          |= hasCalls;
    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
      BB != &BB->getParent()->front();
  }
  
  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
    Returns.push_back(RI);
}
// If we can determine that all possible objects pointed to by the provided
// pointer value are, not only dereferenceable, but also definitively less than
// or equal to the provided maximum size, then return true. Otherwise, return
// false (constant global values and allocas fall into this category).
//
// FIXME: This should probably live in ValueTracking (or similar).
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
                                     const DataLayout &DL) {
  SmallPtrSet<Value *, 4> Visited;
  SmallVector<Value *, 4> Worklist(1, V);

  do {
    Value *P = Worklist.pop_back_val();
    P = P->stripPointerCasts();

    if (!Visited.insert(P).second)
      continue;

    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
      Worklist.push_back(SI->getTrueValue());
      Worklist.push_back(SI->getFalseValue());
      continue;
    }

    if (PHINode *PN = dyn_cast<PHINode>(P)) {
      for (Value *IncValue : PN->incoming_values())
        Worklist.push_back(IncValue);
      continue;
    }

    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
      if (GA->mayBeOverridden())
        return false;
      Worklist.push_back(GA->getAliasee());
      continue;
    }

    // If we know how big this object is, and it is less than MaxSize, continue
    // searching. Otherwise, return false.
    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
      if (!AI->getAllocatedType()->isSized())
        return false;

      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
      if (!CS)
        return false;

      uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
      // Make sure that, even if the multiplication below would wrap as an
      // uint64_t, we still do the right thing.
      if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
        return false;
      continue;
    }

    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
      if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
        return false;

      uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
      if (InitSize > MaxSize)
        return false;
      continue;
    }

    return false;
  } while (!Worklist.empty());

  return true;
}
示例#5
0
static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
                                                  LVILatticeVal &Result,
                                                  const DataLayout &DL,
                                                  TargetLibraryInfo *TLI) {

  // If we know the value is a constant, evaluate the conditional.
  Constant *Res = nullptr;
  if (Result.isConstant()) {
    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
                                          TLI);
    if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
      return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
    return LazyValueInfo::Unknown;
  }

  if (Result.isConstantRange()) {
    ConstantInt *CI = dyn_cast<ConstantInt>(C);
    if (!CI) return LazyValueInfo::Unknown;

    ConstantRange CR = Result.getConstantRange();
    if (Pred == ICmpInst::ICMP_EQ) {
      if (!CR.contains(CI->getValue()))
        return LazyValueInfo::False;

      if (CR.isSingleElement() && CR.contains(CI->getValue()))
        return LazyValueInfo::True;
    } else if (Pred == ICmpInst::ICMP_NE) {
      if (!CR.contains(CI->getValue()))
        return LazyValueInfo::True;

      if (CR.isSingleElement() && CR.contains(CI->getValue()))
        return LazyValueInfo::False;
    }

    // Handle more complex predicates.
    ConstantRange TrueValues =
        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
    if (TrueValues.contains(CR))
      return LazyValueInfo::True;
    if (TrueValues.inverse().contains(CR))
      return LazyValueInfo::False;
    return LazyValueInfo::Unknown;
  }

  if (Result.isNotConstant()) {
    // If this is an equality comparison, we can try to fold it knowing that
    // "V != C1".
    if (Pred == ICmpInst::ICMP_EQ) {
      // !C1 == C -> false iff C1 == C.
      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
                                            Result.getNotConstant(), C, DL,
                                            TLI);
      if (Res->isNullValue())
        return LazyValueInfo::False;
    } else if (Pred == ICmpInst::ICMP_NE) {
      // !C1 != C -> true iff C1 == C.
      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
                                            Result.getNotConstant(), C, DL,
                                            TLI);
      if (Res->isNullValue())
        return LazyValueInfo::True;
    }
    return LazyValueInfo::Unknown;
  }

  return LazyValueInfo::Unknown;
}
示例#6
0
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
/// copy from MDep's input if we can.  MSize is the size of M's copy.
/// 
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
                                              uint64_t MSize) {
  // We can only transforms memcpy's where the dest of one is the source of the
  // other.
  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
    return false;
  
  // If dep instruction is reading from our current input, then it is a noop
  // transfer and substituting the input won't change this instruction.  Just
  // ignore the input and let someone else zap MDep.  This handles cases like:
  //    memcpy(a <- a)
  //    memcpy(b <- a)
  if (M->getSource() == MDep->getSource())
    return false;
  
  // Second, the length of the memcpy's must be the same, or the preceding one
  // must be larger than the following one.
  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
    return false;
  
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  // Verify that the copied-from memory doesn't change in between the two
  // transfers.  For example, in:
  //    memcpy(a <- b)
  //    *b = 42;
  //    memcpy(c <- a)
  // It would be invalid to transform the second memcpy into memcpy(c <- b).
  //
  // TODO: If the code between M and MDep is transparent to the destination "c",
  // then we could still perform the xform by moving M up to the first memcpy.
  //
  // NOTE: This is conservative, it will stop on any read from the source loc,
  // not just the defining memcpy.
  MemDepResult SourceDep =
    MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
                                 false, M, M->getParent());
  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
    return false;
  
  // If the dest of the second might alias the source of the first, then the
  // source and dest might overlap.  We still want to eliminate the intermediate
  // value, but we have to generate a memmove instead of memcpy.
  bool UseMemMove = false;
  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
    UseMemMove = true;
  
  // If all checks passed, then we can transform M.
  
  // Make sure to use the lesser of the alignment of the source and the dest
  // since we're changing where we're reading from, but don't want to increase
  // the alignment past what can be read from or written to.
  // TODO: Is this worth it if we're creating a less aligned memcpy? For
  // example we could be moving from movaps -> movq on x86.
  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
  
  IRBuilder<> Builder(M);
  if (UseMemMove)
    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
                          Align, M->isVolatile());
  else
    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
                         Align, M->isVolatile());

  // Remove the instruction we're replacing.
  MD->removeInstruction(M);
  M->eraseFromParent();
  ++NumMemCpyInstr;
  return true;
}
示例#7
0
bool InductionDescriptor::isInductionPHI(
    PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
    InductionDescriptor &D, const SCEV *Expr,
    SmallVectorImpl<Instruction *> *CastsToIgnore) {
  Type *PhiTy = Phi->getType();
  // We only handle integer and pointer inductions variables.
  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
    return false;

  // Check that the PHI is consecutive.
  const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);

  if (!AR) {
    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
    return false;
  }

  if (AR->getLoop() != TheLoop) {
    // FIXME: We should treat this as a uniform. Unfortunately, we
    // don't currently know how to handled uniform PHIs.
    LLVM_DEBUG(
        dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
    return false;
  }

  Value *StartValue =
      Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());

  BasicBlock *Latch = AR->getLoop()->getLoopLatch();
  if (!Latch)
    return false;
  BinaryOperator *BOp =
      dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));

  const SCEV *Step = AR->getStepRecurrence(*SE);
  // Calculate the pointer stride and check if it is consecutive.
  // The stride may be a constant or a loop invariant integer value.
  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
  if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
    return false;

  if (PhiTy->isIntegerTy()) {
    D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
                            CastsToIgnore);
    return true;
  }

  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
  // Pointer induction should be a constant.
  if (!ConstStep)
    return false;

  ConstantInt *CV = ConstStep->getValue();
  Type *PointerElementType = PhiTy->getPointerElementType();
  // The pointer stride cannot be determined if the pointer element type is not
  // sized.
  if (!PointerElementType->isSized())
    return false;

  const DataLayout &DL = Phi->getModule()->getDataLayout();
  int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
  if (!Size)
    return false;

  int64_t CVSize = CV->getSExtValue();
  if (CVSize % Size)
    return false;
  auto *StepValue =
      SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
  return true;
}
示例#8
0
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
  Optional<AllocFnsTy> FnData =
      getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
  if (!FnData)
    return unknown();

  // handle strdup-like functions separately
  if (FnData->AllocTy == StrDupLike) {
    APInt Size(IntTyBits, GetStringLength(CS.getArgument(0)));
    if (!Size)
      return unknown();

    // strndup limits strlen
    if (FnData->FstParam > 0) {
      ConstantInt *Arg =
          dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
      if (!Arg)
        return unknown();

      APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
      if (Size.ugt(MaxSize))
        Size = MaxSize + 1;
    }
    return std::make_pair(Size, Zero);
  }

  ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
  if (!Arg)
    return unknown();

  // When we're compiling N-bit code, and the user uses parameters that are
  // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
  // trouble with APInt size issues. This function handles resizing + overflow
  // checks for us.
  auto CheckedZextOrTrunc = [&](APInt &I) {
    // More bits than we can handle. Checking the bit width isn't necessary, but
    // it's faster than checking active bits, and should give `false` in the
    // vast majority of cases.
    if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
      return false;
    if (I.getBitWidth() != IntTyBits)
      I = I.zextOrTrunc(IntTyBits);
    return true;
  };

  APInt Size = Arg->getValue();
  if (!CheckedZextOrTrunc(Size))
    return unknown();

  // size determined by just 1 parameter
  if (FnData->SndParam < 0)
    return std::make_pair(Size, Zero);

  Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam));
  if (!Arg)
    return unknown();

  APInt NumElems = Arg->getValue();
  if (!CheckedZextOrTrunc(NumElems))
    return unknown();

  bool Overflow;
  Size = Size.umul_ov(NumElems, Overflow);
  return Overflow ? unknown() : std::make_pair(Size, Zero);

  // TODO: handle more standard functions (+ wchar cousins):
  // - strdup / strndup
  // - strcpy / strncpy
  // - strcat / strncat
  // - memcpy / memmove
  // - strcat / strncat
  // - memset
}
示例#9
0
int qdp_jit_vec::vectorize_loads( std::vector<std::vector<Instruction*> >& load_instructions )
{
  DEBUG(dbgs() << "Vectorize loads, total of " << load_instructions.size() << "\n");

  //std::vector<std::pair<Value*,Value*> > scalar_vector_loads;
  scalar_vector_pairs.clear();

  if (load_instructions.empty())
    return 0;

  int load_vec_elem = 0;
  for( std::vector<Instruction*>& VI : load_instructions ) {
    DEBUG(dbgs() << "Processing vector of loads number " << load_vec_elem++ << "\n");
    assert( VI.size() == vec_len && "length of vector of loads does not match vec_len" );
    int loads_consec = true;
    uint64_t lo,hi;
    bool first = true;
    for( Instruction* I : VI ) {
      GetElementPtrInst* GEP;
      if ((GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0)))) {
	if (first) {
	  ConstantInt * CI;
	  if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) {
	    lo = CI->getZExtValue();
	    hi = lo+1;
	    first=false;
	  } else {
	    DEBUG(dbgs() << "First load in the chain: Operand of GEP not a ConstantInt" << *GEP->getOperand(1) << "\n");
	    assert( 0 && "First load in the chain: Operand of GEP not a ConstantInt\n");
	    exit(0);
	  }
	} else {
	  ConstantInt * CI;
	  if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) {
	    if (hi != CI->getZExtValue()) {
	      DEBUG(dbgs() << "Loads not consecutive lo=" << lo << " hi=" << hi << " this=" << CI->getZExtValue() << "\n");
	      loads_consec = false;
	    } else {
	      hi++;
	    }
	  }
	}
      } else {
	DEBUG(dbgs() << "Operand of load not a GEP " << *I->getOperand(0) << "\n");
	assert( 0 && "Operand of load not a GEP" );
	exit(0);
	loads_consec = false;
      }
    }
    if (loads_consec) {
      DEBUG(dbgs() << "Loads consecuetive\n");

      LoadInst* LI = cast<LoadInst>(VI.at(0));
      GetElementPtrInst* GEP = cast<GetElementPtrInst>(LI->getOperand(0));
      Instruction* GEPcl = clone_with_operands(GEP);
      unsigned AS = LI->getPointerAddressSpace();
      VectorType *VecTy = VectorType::get( LI->getType() , vec_len );

      unsigned bitwidth = LI->getType()->getPrimitiveSizeInBits();
      unsigned bytewidth = bitwidth == 1 ? 1 : bitwidth/8;
      DEBUG(dbgs() << "bit/byte width of load instr trype: " << bitwidth << "/" << bytewidth << "\n");
 
      //Builder->SetInsertPoint( GEP );
      Value *VecPtr = Builder->CreateBitCast(GEPcl,VecTy->getPointerTo(AS));
      //Value *VecLoad = Builder->CreateLoad( VecPtr );
      
      unsigned align = lo % vec_len == 0 ? bytewidth * vec_len : bytewidth;
      Value *VecLoad = Builder->CreateAlignedLoad( VecPtr , align );

      //DEBUG(dbgs() << "created vector load: " << *VecLoad << "\n");
      //function->dump();

      // unsigned AS = LI->getPointerAddressSpace();
      // VectorType *VecTy = VectorType::get( LI->getType() , vec_len );
      // Builder->SetInsertPoint( LI );
      // Value *VecPtr = Builder->CreateBitCast(LI->getPointerOperand(),VecTy->getPointerTo(AS));
      // Value *VecLoad = Builder->CreateLoad( VecPtr );

      scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , VecLoad ) );
    } else {
      DEBUG(dbgs() << "Loads not consecutive:\n");
      for (Value* V: VI) {
	DEBUG(dbgs() << *V << "\n");
      }

      //Instruction* I = dyn_cast<Instruction>(VI.back()->getNextNode());
      //DEBUG(dbgs() << *I << "\n");

      //Builder->SetInsertPoint( VI.at(0) );


      std::vector<Instruction*> VIcl;
      for( Instruction* I : VI ) {
	VIcl.push_back( clone_with_operands(I) );
      }

      VectorType *VecTy = VectorType::get( VI.at(0)->getType() , vec_len );
      Value *Vec = UndefValue::get(VecTy);

      int i=0;
      for( Instruction* I : VIcl ) {
	Vec = Builder->CreateInsertElement(Vec, I, Builder->getInt32(i++));
      }

      scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , Vec ) );      
    }
  }

  //vectorize_all_uses( scalar_vector_loads );

  DEBUG(dbgs() << "Searching for the stores:\n");
  //function->dump();

  //
  // Vectorize all StoreInst reachable by the first load of each vector of loads
  //
  {
    SetVector<Instruction*> to_visit;
    SetVector<Instruction*> stores_processed;
    for( std::vector<Instruction*>& VI : load_instructions ) {
      to_visit.insert(VI.at(0));
    }
    while (!to_visit.empty()) {
      Instruction* I = to_visit.back();
      to_visit.pop_back();
      DEBUG(dbgs() << "visiting " << *I << "\n");
      if (StoreInst* SI = dyn_cast<StoreInst>(I)) {
	if (!stores_processed.count(SI)) {
	  get_vector_version( SI );
	  stores_processed.insert( SI );
	}
      } else {
	for (Use &U : I->uses()) {
	  Value* V = U.getUser();
	  to_visit.insert(cast<Instruction>(V));
	}
      }
    }
  }

  // DEBUG(dbgs() << "After vectorizing the stores\n");
  // function->dump();

  //
  // Mark all stores as being processed
  //
  SetVector<Instruction*> to_visit;
  for( std::vector<Instruction*>& VI : load_instructions ) {
    for( Instruction* I : VI ) {
      to_visit.insert(I);
      if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0))) {
	for_erasure.insert(GEP);
      }
    }
  }
  while (!to_visit.empty()) {
    Instruction* I = to_visit.back();
    to_visit.pop_back();
    for_erasure.insert(I);
    if (StoreInst* SI = dyn_cast<StoreInst>(I)) {
      stores_processed.insert(SI);
      if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(SI->getOperand(1))) {
	for_erasure.insert(GEP);
      }
    } else {
      for (Use &U : I->uses()) {
	Value* V = U.getUser();
	to_visit.insert(cast<Instruction>(V));
      }
    }
  }
  
  DEBUG(dbgs() << "----------------------------------------\n");
  DEBUG(dbgs() << "After vectorize_loads\n");
  //function->dump();

  return 0;
}
示例#10
0
/// CanEvaluateShifted - See if we can compute the specified value, but shifted
/// logically to the left or right by some number of bits.  This should return
/// true if the expression can be computed for the same cost as the current
/// expression tree.  This is used to eliminate extraneous shifting from things
/// like:
///      %C = shl i128 %A, 64
///      %D = shl i128 %B, 96
///      %E = or i128 %C, %D
///      %F = lshr i128 %E, 64
/// where the client will ask if E can be computed shifted right by 64-bits.  If
/// this succeeds, the GetShiftedValue function will be called to produce the
/// value.
static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                               InstCombiner &IC) {
  // We can always evaluate constants shifted.
  if (isa<Constant>(V))
    return true;

  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;

  // If this is the opposite shift, we can directly reuse the input of the shift
  // if the needed bits are already zero in the input.  This allows us to reuse
  // the value which means that we don't care if the shift has multiple uses.
  //  TODO:  Handle opposite shift by exact value.
  ConstantInt *CI = 0;
  if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
      (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
    if (CI->getZExtValue() == NumBits) {
      // TODO: Check that the input bits are already zero with MaskedValueIsZero
#if 0
      // If this is a truncate of a logical shr, we can truncate it to a smaller
      // lshr iff we know that the bits we would otherwise be shifting in are
      // already zeros.
      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
      uint32_t BitWidth = Ty->getScalarSizeInBits();
      if (MaskedValueIsZero(I->getOperand(0),
            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
          CI->getLimitedValue(BitWidth) < BitWidth) {
        return CanEvaluateTruncated(I->getOperand(0), Ty);
      }
#endif

    }
  }

  // We can't mutate something that has multiple uses: doing so would
  // require duplicating the instruction in general, which isn't profitable.
  if (!I->hasOneUse()) return false;

  switch (I->getOpcode()) {
  default: return false;
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
    return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
           CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);

  case Instruction::Shl: {
    // We can often fold the shift into shifts-by-a-constant.
    CI = dyn_cast<ConstantInt>(I->getOperand(1));
    if (CI == 0) return false;

    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
    if (isLeftShift) return true;

    // We can always turn shl(c)+shr(c) -> and(c2).
    if (CI->getValue() == NumBits) return true;

    unsigned TypeWidth = I->getType()->getScalarSizeInBits();

    // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
    // profitable unless we know the and'd out bits are already zero.
    if (CI->getZExtValue() > NumBits) {
      unsigned LowBits = TypeWidth - CI->getZExtValue();
      if (MaskedValueIsZero(I->getOperand(0),
                       APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
        return true;
    }

    return false;
  }
  case Instruction::LShr: {
    // We can often fold the shift into shifts-by-a-constant.
    CI = dyn_cast<ConstantInt>(I->getOperand(1));
    if (CI == 0) return false;

    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
    if (!isLeftShift) return true;

    // We can always turn lshr(c)+shl(c) -> and(c2).
    if (CI->getValue() == NumBits) return true;

    unsigned TypeWidth = I->getType()->getScalarSizeInBits();

    // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
    // profitable unless we know the and'd out bits are already zero.
    if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) {
      unsigned LowBits = CI->getZExtValue() - NumBits;
      if (MaskedValueIsZero(I->getOperand(0),
                          APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
        return true;
    }

    return false;
  }
  case Instruction::Select: {
    SelectInst *SI = cast<SelectInst>(I);
    return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
           CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
  }
  case Instruction::PHI: {
    // We can change a phi if we can change all operands.  Note that we never
    // get into trouble with cyclic PHIs here because we only consider
    // instructions with a single use.
    PHINode *PN = cast<PHINode>(I);
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
        return false;
    return true;
  }
  }
}
示例#11
0
bool FastISel::SelectCall(const User *I) {
  const CallInst *Call = cast<CallInst>(I);

  // Handle simple inline asms.
  if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
    // Don't attempt to handle constraints.
    if (!IA->getConstraintString().empty())
      return false;

    unsigned ExtraInfo = 0;
    if (IA->hasSideEffects())
      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
    if (IA->isAlignStack())
      ExtraInfo |= InlineAsm::Extra_IsAlignStack;

    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
            TII.get(TargetOpcode::INLINEASM))
      .addExternalSymbol(IA->getAsmString().c_str())
      .addImm(ExtraInfo);
    return true;
  }

  MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
  ComputeUsesVAFloatArgument(*Call, &MMI);

  const Function *F = Call->getCalledFunction();
  if (!F) return false;

  // Handle selected intrinsic function calls.
  switch (F->getIntrinsicID()) {
  default: break;
    // At -O0 we don't care about the lifetime intrinsics.
  case Intrinsic::lifetime_start:
  case Intrinsic::lifetime_end:
    return true;
  case Intrinsic::dbg_declare: {
    const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
    if (!DIVariable(DI->getVariable()).Verify() ||
        !FuncInfo.MF->getMMI().hasDebugInfo()) {
      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
      return true;
    }

    const Value *Address = DI->getAddress();
    if (!Address || isa<UndefValue>(Address)) {
      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
      return true;
    }

    unsigned Reg = 0;
    unsigned Offset = 0;
    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
      // Some arguments' frame index is recorded during argument lowering.
      Offset = FuncInfo.getArgumentFrameIndex(Arg);
      if (Offset)
        Reg = TRI.getFrameRegister(*FuncInfo.MF);
    }
    if (!Reg)
      Reg = lookUpRegForValue(Address);

    if (!Reg && isa<Instruction>(Address) &&
        (!isa<AllocaInst>(Address) ||
         !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
      Reg = FuncInfo.InitializeRegForValue(Address);

    if (Reg)
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
              TII.get(TargetOpcode::DBG_VALUE))
        .addReg(Reg, RegState::Debug).addImm(Offset)
        .addMetadata(DI->getVariable());
    else
      // We can't yet handle anything else here because it would require
      // generating code, thus altering codegen because of debug info.
      DEBUG(dbgs() << "Dropping debug info for " << DI);
    return true;
  }
  case Intrinsic::dbg_value: {
    // This form of DBG_VALUE is target-independent.
    const DbgValueInst *DI = cast<DbgValueInst>(Call);
    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
    const Value *V = DI->getValue();
    if (!V) {
      // Currently the optimizer can produce this; insert an undef to
      // help debugging.  Probably the optimizer should not do this.
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
        .addReg(0U).addImm(DI->getOffset())
        .addMetadata(DI->getVariable());
    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
      if (CI->getBitWidth() > 64)
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
          .addCImm(CI).addImm(DI->getOffset())
          .addMetadata(DI->getVariable());
      else 
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
          .addImm(CI->getZExtValue()).addImm(DI->getOffset())
          .addMetadata(DI->getVariable());
    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
        .addFPImm(CF).addImm(DI->getOffset())
        .addMetadata(DI->getVariable());
    } else if (unsigned Reg = lookUpRegForValue(V)) {
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
        .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
        .addMetadata(DI->getVariable());
    } else {
      // We can't yet handle anything else here because it would require
      // generating code, thus altering codegen because of debug info.
      DEBUG(dbgs() << "Dropping debug info for " << DI);
    }
    return true;
  }
  case Intrinsic::objectsize: {
    ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
    unsigned long long Res = CI->isZero() ? -1ULL : 0;
    Constant *ResCI = ConstantInt::get(Call->getType(), Res);
    unsigned ResultReg = getRegForValue(ResCI);
    if (ResultReg == 0)
      return false;
    UpdateValueMap(Call, ResultReg);
    return true;
  }
  }

  // Usually, it does not make sense to initialize a value,
  // make an unrelated function call and use the value, because
  // it tends to be spilled on the stack. So, we move the pointer
  // to the last local value to the beginning of the block, so that
  // all the values which have already been materialized,
  // appear after the call. It also makes sense to skip intrinsics
  // since they tend to be inlined.
  if (!isa<IntrinsicInst>(F))
    flushLocalValueMap();

  // An arbitrary call. Bail.
  return false;
}
示例#12
0
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                              InstCombiner &IC) {
  // We can always evaluate constants shifted.
  if (Constant *C = dyn_cast<Constant>(V)) {
    if (isLeftShift)
      V = IC.Builder->CreateShl(C, NumBits);
    else
      V = IC.Builder->CreateLShr(C, NumBits);
    // If we got a constantexpr back, try to simplify it with TD info.
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
      V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
                                         IC.getTargetLibraryInfo());
    return V;
  }

  Instruction *I = cast<Instruction>(V);
  IC.Worklist.Add(I);

  switch (I->getOpcode()) {
  default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
    return I;

  case Instruction::Shl: {
    BinaryOperator *BO = cast<BinaryOperator>(I);
    unsigned TypeWidth = BO->getType()->getScalarSizeInBits();

    // We only accept shifts-by-a-constant in CanEvaluateShifted.
    ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));

    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
    if (isLeftShift) {
      // If this is oversized composite shift, then unsigned shifts get 0.
      unsigned NewShAmt = NumBits+CI->getZExtValue();
      if (NewShAmt >= TypeWidth)
        return Constant::getNullValue(I->getType());

      BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
      BO->setHasNoUnsignedWrap(false);
      BO->setHasNoSignedWrap(false);
      return I;
    }

    // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
    // zeros.
    if (CI->getValue() == NumBits) {
      APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
      V = IC.Builder->CreateAnd(BO->getOperand(0),
                                ConstantInt::get(BO->getContext(), Mask));
      if (Instruction *VI = dyn_cast<Instruction>(V)) {
        VI->moveBefore(BO);
        VI->takeName(BO);
      }
      return V;
    }

    // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
    // the and won't be needed.
    assert(CI->getZExtValue() > NumBits);
    BO->setOperand(1, ConstantInt::get(BO->getType(),
                                       CI->getZExtValue() - NumBits));
    BO->setHasNoUnsignedWrap(false);
    BO->setHasNoSignedWrap(false);
    return BO;
  }
  case Instruction::LShr: {
    BinaryOperator *BO = cast<BinaryOperator>(I);
    unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
    // We only accept shifts-by-a-constant in CanEvaluateShifted.
    ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));

    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
    if (!isLeftShift) {
      // If this is oversized composite shift, then unsigned shifts get 0.
      unsigned NewShAmt = NumBits+CI->getZExtValue();
      if (NewShAmt >= TypeWidth)
        return Constant::getNullValue(BO->getType());

      BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
      BO->setIsExact(false);
      return I;
    }

    // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
    // zeros.
    if (CI->getValue() == NumBits) {
      APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
      V = IC.Builder->CreateAnd(I->getOperand(0),
                                ConstantInt::get(BO->getContext(), Mask));
      if (Instruction *VI = dyn_cast<Instruction>(V)) {
        VI->moveBefore(I);
        VI->takeName(I);
      }
      return V;
    }

    // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
    // the and won't be needed.
    assert(CI->getZExtValue() > NumBits);
    BO->setOperand(1, ConstantInt::get(BO->getType(),
                                       CI->getZExtValue() - NumBits));
    BO->setIsExact(false);
    return BO;
  }

  case Instruction::Select:
    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
    return I;
  case Instruction::PHI: {
    // We can change a phi if we can change all operands.  Note that we never
    // get into trouble with cyclic PHIs here because we only consider
    // instructions with a single use.
    PHINode *PN = cast<PHINode>(I);
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
                                              NumBits, isLeftShift, IC));
    return PN;
  }
  }
}
示例#13
0
/// Return true if we can evaluate the specified expression tree if the vector
/// elements were shuffled in a different order.
static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
                                unsigned Depth = 5) {
  // We can always reorder the elements of a constant.
  if (isa<Constant>(V))
    return true;

  // We won't reorder vector arguments. No IPO here.
  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;

  // Two users may expect different orders of the elements. Don't try it.
  if (!I->hasOneUse())
    return false;

  if (Depth == 0) return false;

  switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::FAdd:
    case Instruction::Sub:
    case Instruction::FSub:
    case Instruction::Mul:
    case Instruction::FMul:
    case Instruction::UDiv:
    case Instruction::SDiv:
    case Instruction::FDiv:
    case Instruction::URem:
    case Instruction::SRem:
    case Instruction::FRem:
    case Instruction::Shl:
    case Instruction::LShr:
    case Instruction::AShr:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
    case Instruction::ICmp:
    case Instruction::FCmp:
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
    case Instruction::FPToUI:
    case Instruction::FPToSI:
    case Instruction::UIToFP:
    case Instruction::SIToFP:
    case Instruction::FPTrunc:
    case Instruction::FPExt:
    case Instruction::GetElementPtr: {
      for (Value *Operand : I->operands()) {
        if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
          return false;
      }
      return true;
    }
    case Instruction::InsertElement: {
      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
      if (!CI) return false;
      int ElementNumber = CI->getLimitedValue();

      // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
      // can't put an element into multiple indices.
      bool SeenOnce = false;
      for (int i = 0, e = Mask.size(); i != e; ++i) {
        if (Mask[i] == ElementNumber) {
          if (SeenOnce)
            return false;
          SeenOnce = true;
        }
      }
      return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
    }
  }
  return false;
}
示例#14
0
/// \brief Check if Value is always a dereferenceable pointer.
///
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
                                     SmallPtrSetImpl<const Value *> &Visited) {
  // Note that it is not safe to speculate into a malloc'd region because
  // malloc may return null.

  // These are obviously ok.
  if (isa<AllocaInst>(V)) return true;

  // It's not always safe to follow a bitcast, for example:
  //   bitcast i8* (alloca i8) to i32*
  // would result in a 4-byte load from a 1-byte alloca. However,
  // if we're casting from a pointer from a type of larger size
  // to a type of smaller size (or the same size), and the alignment
  // is at least as large as for the resulting pointer type, then
  // we can look through the bitcast.
  if (DL)
    if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) {
      Type *STy = BC->getSrcTy()->getPointerElementType(),
           *DTy = BC->getDestTy()->getPointerElementType();
      if (STy->isSized() && DTy->isSized() &&
          (DL->getTypeStoreSize(STy) >=
           DL->getTypeStoreSize(DTy)) &&
          (DL->getABITypeAlignment(STy) >=
           DL->getABITypeAlignment(DTy)))
        return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
    }

  // Global variables which can't collapse to null are ok.
  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
    return !GV->hasExternalWeakLinkage();

  // byval arguments are okay. Arguments specifically marked as
  // dereferenceable are okay too.
  if (const Argument *A = dyn_cast<Argument>(V)) {
    if (A->hasByValAttr())
      return true;
    else if (uint64_t Bytes = A->getDereferenceableBytes()) {
      Type *Ty = V->getType()->getPointerElementType();
      if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
        return true;
    }

    return false;
  }

  // Return values from call sites specifically marked as dereferenceable are
  // also okay.
  if (ImmutableCallSite CS = V) {
    if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
      Type *Ty = V->getType()->getPointerElementType();
      if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
        return true;
    }
  }

  // For GEPs, determine if the indexing lands within the allocated object.
  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
    // Conservatively require that the base pointer be fully dereferenceable.
    if (!Visited.insert(GEP->getOperand(0)).second)
      return false;
    if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
      return false;
    // Check the indices.
    gep_type_iterator GTI = gep_type_begin(GEP);
    for (User::const_op_iterator I = GEP->op_begin()+1,
         E = GEP->op_end(); I != E; ++I) {
      Value *Index = *I;
      Type *Ty = *GTI++;
      // Struct indices can't be out of bounds.
      if (isa<StructType>(Ty))
        continue;
      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
      if (!CI)
        return false;
      // Zero is always ok.
      if (CI->isZero())
        continue;
      // Check to see that it's within the bounds of an array.
      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
      if (!ATy)
        return false;
      if (CI->getValue().getActiveBits() > 64)
        return false;
      if (CI->getZExtValue() >= ATy->getNumElements())
        return false;
    }
    // Indices check out; this is dereferenceable.
    return true;
  }

  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
    return isDereferenceablePointer(ASC->getOperand(0), DL, Visited);

  // If we don't know, assume the worst.
  return false;
}
示例#15
0
void TartGCPrinter::finishAssembly(AsmPrinter &AP) {
    unsigned nextLabel = 1;
    SafePointList safePoints;

    // Set up for emitting addresses.
    int pointerSize = AP.TM.getTargetData()->getPointerSize();
    int addressAlignLog;
    if (pointerSize == sizeof(int32_t)) {
        addressAlignLog = 2;
    } else {
        addressAlignLog = 3;
    }

    MCStreamer & outStream = AP.OutStreamer;

    // Put this in the data section.
    outStream.SwitchSection(AP.getObjFileLowering().getDataSection());

    // For each function...
    for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
        GCFunctionInfo & gcFn = **FI;

//    if (optShowGC) {
//      errs() << "GCStrategy: Function: " << gcFn.getFunction().getName() << "\n";
//    }

        // And each safe point...
        for (GCFunctionInfo::iterator sp = gcFn.begin(); sp != gcFn.end(); ++sp) {
            StackTraceTable::FieldOffsetList fieldOffsets;
            StackTraceTable::TraceMethodList traceMethods;

            // And for each live root...
            for (GCFunctionInfo::live_iterator rt = gcFn.live_begin(sp); rt != gcFn.live_end(sp); ++rt) {
                int64_t offset = rt->StackOffset;
                const Constant * meta = rt->Metadata;

                if (meta != NULL && !meta->isNullValue()) {
                    // Meta is non-null, so it's a value type.
                    const ConstantArray * traceArray = cast<ConstantArray>(getGlobalValue(meta));

                    // For each trace descriptor in thre meta array...
                    for (ConstantArray::const_op_iterator it = traceArray->op_begin();
                            it != traceArray->op_end(); ++it) {
                        ConstantStruct * descriptor = cast<ConstantStruct>(*it);
                        ConstantInt * fieldCount = cast<ConstantInt>(descriptor->getOperand(1));
                        int64_t dscOffset = toInt(descriptor->getOperand(2), AP.TM);

                        if (fieldCount->isZero()) {
                            // A zero field count means that this is a trace method descriptor.
                            const Constant * traceMethod = descriptor->getOperand(3);
                            assert(offset > -1000 && offset < 1000);
                            assert(dscOffset > -1000 && dscOffset < 1000);
                            traceMethods.push_back(TraceMethodEntry(offset + dscOffset, traceMethod));
                        } else {
                            // Otherwise it's a field offset descriptor.
                            const GlobalVariable * fieldOffsetsVar = cast<GlobalVariable>(
                                        descriptor->getOperand(3)->getOperand(0));

                            // Handle case where the array value is just a ConstantAggregateZero, which
                            // can be generated by llvm::ConstantArray::get() if the array values
                            // are all zero.
                            if (const ConstantAggregateZero * zero =
                                        dyn_cast<ConstantAggregateZero>(fieldOffsetsVar->getInitializer())) {
                                // Array should never contain duplicate offsets, so an all-zero array
                                // can only have one entry.
                                (void)zero;
                                assert(fieldCount->isOne());
                                fieldOffsets.push_back(offset + dscOffset);
                            } else {
                                // Get the field offset array and add to field offsets for this
                                // safe point.
                                const ConstantArray * fieldOffsetArray = cast<ConstantArray>(
                                            fieldOffsetsVar->getInitializer());
                                for (ConstantArray::const_op_iterator el = fieldOffsetArray->op_begin();
                                        el != fieldOffsetArray->op_end(); ++el) {
                                    fieldOffsets.push_back(
                                        offset + dscOffset + toInt(cast<llvm::Constant>(*el), AP.TM));
                                }
                            }
                        }
                    }
                } else {
                    // No metadata, so it's an object reference - just add the field offset.
                    fieldOffsets.push_back(offset);
                }
            }

            // Nothing to trace? Then we're done.
            if (fieldOffsets.empty() && traceMethods.empty()) {
                continue;
            }

            // Create a folding set node and merge with any identical trace tables.
            std::sort(fieldOffsets.begin(), fieldOffsets.end());
            llvm::FoldingSetNodeID id;
            StackTraceTable::ProfileEntries(id, fieldOffsets, traceMethods);

            void * insertPos;
            StackTraceTable * sTable = traceTables.FindNodeOrInsertPos(id, insertPos);
            if (sTable == NULL) {
                sTable = new StackTraceTable(fieldOffsets, traceMethods);

                // Generate the labels for the trace table and field offset table.
                sTable->fieldOffsetsLabel = AP.GetTempSymbol("gc_stack_offsets", nextLabel);
                sTable->traceTableLabel = AP.GetTempSymbol("gc_stack", nextLabel++);

                // Add to folding set
                traceTables.InsertNode(sTable, insertPos);

                // Generate the trace table
                outStream.AddBlankLine();
                AP.EmitAlignment(addressAlignLog);

                // First the field offset descriptor
                outStream.EmitLabel(sTable->traceTableLabel);
                size_t traceMethodCount = sTable->traceMethods.size();
                if (!sTable->fieldOffsets.empty()) {
                    outStream.EmitIntValue(traceMethodCount == 0 ? 1 : 0, 2, 0);
                    outStream.EmitIntValue(sTable->fieldOffsets.size(), 2, 0);
                    outStream.EmitIntValue(0, 4, 0);
                    outStream.EmitSymbolValue(sTable->fieldOffsetsLabel, pointerSize, 0);
                }

                // Next the trace method descriptors
                for (size_t i = 0; i < traceMethodCount; ++i) {
                    const TraceMethodEntry * tm = &sTable->traceMethods[i];
                    const Function * method = dyn_cast<Function>(tm->method());
                    if (method == NULL) {
                        method = cast<Function>(tm->method()->getOperand(0));
                    }

                    outStream.EmitIntValue((i + 1 == traceMethodCount ? 1 : 0), 2, 0);
                    outStream.EmitIntValue(0, 2, 0);
                    outStream.EmitIntValue(tm->offset(), 4, 0);
                    MCSymbol * methodSym = AP.Mang->getSymbol(method);
                    outStream.EmitSymbolValue(methodSym, pointerSize, 0);

                }

                // Now emit the field offset array
                outStream.AddBlankLine();
                AP.EmitAlignment(addressAlignLog);

                outStream.EmitLabel(sTable->fieldOffsetsLabel);
                for (StackTraceTable::FieldOffsetList::const_iterator it = fieldOffsets.begin();
                        it != fieldOffsets.end(); ++it) {
                    outStream.EmitIntValue(*it, pointerSize, 0);
                }
            }

            safePoints.push_back(std::pair<MCSymbol *, MCSymbol *>(sp->Label, sTable->traceTableLabel));
//      if (optShowGC) {
//        if (!sTable->fieldOffsets.empty()) {
//          errs() << "GCStrategy:   Field offset descriptor:";
//          for (StackTraceTable::FieldOffsetList::const_iterator it = sTable->fieldOffsets.begin();
//              it != sTable->fieldOffsets.end(); ++it) {
//            errs() << " " << *it;
//          }
//          errs() << "\n";
//        }
//        if (!sTable->traceMethods.empty()) {
//          errs() << "GCStrategy:   Trace method descriptor: " << "\n";
//        }
//      }
        }
    }

    // Finally, generate the safe point map.
    outStream.AddBlankLine();
    MCSymbol * gcSafepointSymbol = AP.GetExternalSymbolSymbol("GC_safepoint_map");
    outStream.EmitSymbolAttribute(gcSafepointSymbol, MCSA_Global);
    outStream.EmitLabel(gcSafepointSymbol);
    outStream.EmitIntValue(safePoints.size(), pointerSize, 0);
    for (SafePointList::const_iterator it = safePoints.begin(); it != safePoints.end(); ++it) {
        outStream.EmitSymbolValue(it->first, pointerSize, 0);
        outStream.EmitSymbolValue(it->second, pointerSize, 0);
    }
}
示例#16
0
// Propagate existing explicit probabilities from either profile data or
// 'expect' intrinsic processing.
bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
  const TerminatorInst *TI = BB->getTerminator();
  if (TI->getNumSuccessors() == 1)
    return false;
  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
    return false;

  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
  if (!WeightsNode)
    return false;

  // Check that the number of successors is manageable.
  assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");

  // Ensure there are weights for all of the successors. Note that the first
  // operand to the metadata node is a name, not a weight.
  if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
    return false;

  // Build up the final weights that will be used in a temporary buffer.
  // Compute the sum of all weights to later decide whether they need to
  // be scaled to fit in 32 bits.
  uint64_t WeightSum = 0;
  SmallVector<uint32_t, 2> Weights;
  Weights.reserve(TI->getNumSuccessors());
  for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
    ConstantInt *Weight =
        mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
    if (!Weight)
      return false;
    assert(Weight->getValue().getActiveBits() <= 32 &&
           "Too many bits for uint32_t");
    Weights.push_back(Weight->getZExtValue());
    WeightSum += Weights.back();
  }
  assert(Weights.size() == TI->getNumSuccessors() && "Checked above");

  // If the sum of weights does not fit in 32 bits, scale every weight down
  // accordingly.
  uint64_t ScalingFactor =
      (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;

  WeightSum = 0;
  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
    Weights[i] /= ScalingFactor;
    WeightSum += Weights[i];
  }

  if (WeightSum == 0) {
    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
      setEdgeProbability(BB, i, {1, e});
  } else {
    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
  }

  assert(WeightSum <= UINT32_MAX &&
         "Expected weights to scale down to 32 bits");

  return true;
}
示例#17
0
bool HexagonGenExtract::convert(Instruction *In) {
  using namespace PatternMatch;
  Value *BF = 0;
  ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
  BasicBlock *BB = In->getParent();
  LLVMContext &Ctx = BB->getContext();
  bool LogicalSR;

  // (and (shl (lshr x, #sr), #sl), #m)
  LogicalSR = true;
  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                               m_ConstantInt(CSL)),
                         m_ConstantInt(CM)));

  if (!Match) {
    // (and (shl (ashr x, #sr), #sl), #m)
    LogicalSR = false;
    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (shl x, #sl), #m)
    LogicalSR = true;
    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
    if (Match && NoSR0)
      return false;
  }
  if (!Match) {
    // (and (lshr x, #sr), #m)
    LogicalSR = true;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (ashr x, #sr), #m)
    LogicalSR = false;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    CM = 0;
    // (shl (lshr x, #sr), #sl)
    LogicalSR = true;
    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match) {
    CM = 0;
    // (shl (ashr x, #sr), #sl)
    LogicalSR = false;
    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match)
    return false;

  Type *Ty = BF->getType();
  if (!Ty->isIntegerTy())
    return false;
  unsigned BW = Ty->getPrimitiveSizeInBits();
  if (BW != 32 && BW != 64)
    return false;

  uint32_t SR = CSR->getZExtValue();
  uint32_t SL = CSL->getZExtValue();

  if (!CM) {
    // If there was no and, and the shift left did not remove all potential
    // sign bits created by the shift right, then extractu cannot reproduce
    // this value.
    if (!LogicalSR && (SR > SL))
      return false;
    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
    CM = ConstantInt::get(Ctx, A);
  }

  // CM is the shifted-left mask. Shift it back right to remove the zero
  // bits on least-significant positions.
  APInt M = CM->getValue().lshr(SL);
  uint32_t T = M.countTrailingOnes();

  // During the shifts some of the bits will be lost. Calculate how many
  // of the original value will remain after shift right and then left.
  uint32_t U = BW - std::max(SL, SR);
  // The width of the extracted field is the minimum of the original bits
  // that remain after the shifts and the number of contiguous 1s in the mask.
  uint32_t W = std::min(U, T);
  if (W == 0)
    return false;

  // Check if the extracted bits are contained within the mask that it is
  // and-ed with. The extract operation will copy these bits, and so the
  // mask cannot any holes in it that would clear any of the bits of the
  // extracted field.
  if (!LogicalSR) {
    // If the shift right was arithmetic, it could have included some 1 bits.
    // It is still ok to generate extract, but only if the mask eliminates
    // those bits (i.e. M does not have any bits set beyond U).
    APInt C = APInt::getHighBitsSet(BW, BW-U);
    if (M.intersects(C) || !APIntOps::isMask(W, M))
      return false;
  } else {
    // Check if M starts with a contiguous sequence of W times 1 bits. Get
    // the low U bits of M (which eliminates the 0 bits shifted in on the
    // left), and check if the result is APInt's "mask":
    if (!APIntOps::isMask(W, M.getLoBits(U)))
      return false;
  }

  IRBuilder<> IRB(In);
  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
                                   : Intrinsic::hexagon_S2_extractup;
  Module *Mod = BB->getParent()->getParent();
  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
  if (SL != 0)
    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
  In->replaceAllUsesWith(NewIn);
  return true;
}
示例#18
0
bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
  const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
  if (!BI || !BI->isConditional())
    return false;

  Value *Cond = BI->getCondition();
  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
  if (!CI)
    return false;

  Value *RHS = CI->getOperand(1);
  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
  if (!CV)
    return false;

  // If the LHS is the result of AND'ing a value with a single bit bitmask,
  // we don't have information about probabilities.
  if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
    if (LHS->getOpcode() == Instruction::And)
      if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
        if (AndRHS->getUniqueInteger().isPowerOf2())
          return false;

  bool isProb;
  if (CV->isZero()) {
    switch (CI->getPredicate()) {
    case CmpInst::ICMP_EQ:
      // X == 0   ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_NE:
      // X != 0   ->  Likely
      isProb = true;
      break;
    case CmpInst::ICMP_SLT:
      // X < 0   ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_SGT:
      // X > 0   ->  Likely
      isProb = true;
      break;
    default:
      return false;
    }
  } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
    // InstCombine canonicalizes X <= 0 into X < 1.
    // X <= 0   ->  Unlikely
    isProb = false;
  } else if (CV->isAllOnesValue()) {
    switch (CI->getPredicate()) {
    case CmpInst::ICMP_EQ:
      // X == -1  ->  Unlikely
      isProb = false;
      break;
    case CmpInst::ICMP_NE:
      // X != -1  ->  Likely
      isProb = true;
      break;
    case CmpInst::ICMP_SGT:
      // InstCombine canonicalizes X >= 0 into X > -1.
      // X >= 0   ->  Likely
      isProb = true;
      break;
    default:
      return false;
    }
  } else {
    return false;
  }

  unsigned TakenIdx = 0, NonTakenIdx = 1;

  if (!isProb)
    std::swap(TakenIdx, NonTakenIdx);

  BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
                              ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
  setEdgeProbability(BB, TakenIdx, TakenProb);
  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
  return true;
}
示例#19
0
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
  // The general transformation to keep in mind is
  //
  //   call @func(..., src, ...)
  //   memcpy(dest, src, ...)
  //
  // ->
  //
  //   memcpy(dest, src, ...)
  //   call @func(..., dest, ...)
  //
  // Since moving the memcpy is technically awkward, we additionally check that
  // src only holds uninitialized values at the moment of the call, meaning that
  // the memcpy can be discarded rather than moved.

  // Deliberately get the source and destination with bitcasts stripped away,
  // because we'll need to do type comparisons based on the underlying type.
  Value *cpyDest = cpy->getDest();
  Value *cpySrc = cpy->getSource();
  CallSite CS(C);

  // We need to be able to reason about the size of the memcpy, so we require
  // that it be a constant.
  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
  if (!cpyLength)
    return false;

  // Require that src be an alloca.  This simplifies the reasoning considerably.
  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
  if (!srcAlloca)
    return false;

  // Check that all of src is copied to dest.
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false;

  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
  if (!srcArraySize)
    return false;

  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
    srcArraySize->getZExtValue();

  if (cpyLength->getZExtValue() < srcSize)
    return false;

  // Check that accessing the first srcSize bytes of dest will not cause a
  // trap.  Otherwise the transform is invalid since it might cause a trap
  // to occur earlier than it otherwise would.
  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
    // The destination is an alloca.  Check it is larger than srcSize.
    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
    if (!destArraySize)
      return false;

    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
      destArraySize->getZExtValue();

    if (destSize < srcSize)
      return false;
  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
    // If the destination is an sret parameter then only accesses that are
    // outside of the returned struct type can trap.
    if (!A->hasStructRetAttr())
      return false;

    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
    uint64_t destSize = TD->getTypeAllocSize(StructTy);

    if (destSize < srcSize)
      return false;
  } else {
    return false;
  }

  // Check that src is not accessed except via the call and the memcpy.  This
  // guarantees that it holds only undefined values when passed in (so the final
  // memcpy can be dropped), that it is not read or written between the call and
  // the memcpy, and that writing beyond the end of it is undefined.
  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
                                   srcAlloca->use_end());
  while (!srcUseList.empty()) {
    User *UI = srcUseList.pop_back_val();

    if (isa<BitCastInst>(UI)) {
      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
           I != E; ++I)
        srcUseList.push_back(*I);
    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
      if (G->hasAllZeroIndices())
        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
             I != E; ++I)
          srcUseList.push_back(*I);
      else
        return false;
    } else if (UI != C && UI != cpy) {
      return false;
    }
  }

  // Since we're changing the parameter to the callsite, we need to make sure
  // that what would be the new parameter dominates the callsite.
  DominatorTree &DT = getAnalysis<DominatorTree>();
  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
    if (!DT.dominates(cpyDestInst, C))
      return false;

  // In addition to knowing that the call does not access src in some
  // unexpected manner, for example via a global, which we deduce from
  // the use analysis, we also need to know that it does not sneakily
  // access dest.  We rely on AA to figure this out for us.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
      AliasAnalysis::NoModRef)
    return false;

  // All the checks have passed, so do the transformation.
  bool changedArgument = false;
  for (unsigned i = 0; i < CS.arg_size(); ++i)
    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
      if (cpySrc->getType() != cpyDest->getType())
        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                              cpyDest->getName(), C);
      changedArgument = true;
      if (CS.getArgument(i)->getType() == cpyDest->getType())
        CS.setArgument(i, cpyDest);
      else
        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
    }

  if (!changedArgument)
    return false;

  // Drop any cached information about the call, because we may have changed
  // its dependence information by changing its parameter.
  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
  MD.removeInstruction(C);

  // Remove the memcpy
  MD.removeInstruction(cpy);
  cpy->eraseFromParent();
  ++NumMemCpyInstr;

  return true;
}
示例#20
0
// switchConvert - Convert the switch statement into a binary lookup of
// the case values. The function recursively builds this tree.
// LowerBound and UpperBound are used to keep track of the bounds for Val
// that have already been checked by a block emitted by one of the previous
// calls to switchConvert in the call stack.
BasicBlock *
LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
                           ConstantInt *UpperBound, Value *Val,
                           BasicBlock *Predecessor, BasicBlock *OrigBlock,
                           BasicBlock *Default,
                           const std::vector<IntRange> &UnreachableRanges) {
  unsigned Size = End - Begin;

  if (Size == 1) {
    // Check if the Case Range is perfectly squeezed in between
    // already checked Upper and Lower bounds. If it is then we can avoid
    // emitting the code that checks if the value actually falls in the range
    // because the bounds already tell us so.
    if (Begin->Low == LowerBound && Begin->High == UpperBound) {
      unsigned NumMergedCases = 0;
      if (LowerBound && UpperBound)
        NumMergedCases =
            UpperBound->getSExtValue() - LowerBound->getSExtValue();
      fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
      return Begin->BB;
    }
    return newLeafBlock(*Begin, Val, OrigBlock, Default);
  }

  unsigned Mid = Size / 2;
  std::vector<CaseRange> LHS(Begin, Begin + Mid);
  DEBUG(dbgs() << "LHS: " << LHS << "\n");
  std::vector<CaseRange> RHS(Begin + Mid, End);
  DEBUG(dbgs() << "RHS: " << RHS << "\n");

  CaseRange &Pivot = *(Begin + Mid);
  DEBUG(dbgs() << "Pivot ==> "
               << Pivot.Low->getValue()
               << " -" << Pivot.High->getValue() << "\n");

  // NewLowerBound here should never be the integer minimal value.
  // This is because it is computed from a case range that is never
  // the smallest, so there is always a case range that has at least
  // a smaller value.
  ConstantInt *NewLowerBound = Pivot.Low;

  // Because NewLowerBound is never the smallest representable integer
  // it is safe here to subtract one.
  ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
                                                NewLowerBound->getValue() - 1);

  if (!UnreachableRanges.empty()) {
    // Check if the gap between LHS's highest and NewLowerBound is unreachable.
    int64_t GapLow = LHS.back().High->getSExtValue() + 1;
    int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
    IntRange Gap = { GapLow, GapHigh };
    if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
      NewUpperBound = LHS.back().High;
  }

  DEBUG(dbgs() << "LHS Bounds ==> ";
        if (LowerBound) {
          dbgs() << LowerBound->getSExtValue();
        } else {
          dbgs() << "NONE";
        }
        dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
        dbgs() << "RHS Bounds ==> ";
        dbgs() << NewLowerBound->getSExtValue() << " - ";
        if (UpperBound) {
          dbgs() << UpperBound->getSExtValue() << "\n";
        } else {
          dbgs() << "NONE\n";
        });
示例#21
0
int InductionDescriptor::getConsecutiveDirection() const {
  ConstantInt *ConstStep = getConstIntStepValue();
  if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
    return ConstStep->getSExtValue();
  return 0;
}
示例#22
0
/// Evaluate all instructions in block BB, returning true if successful, false
/// if we can't evaluate it.  NewBB returns the next BB that control flows into,
/// or null upon return.
bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
                              BasicBlock *&NextBB) {
  // This is the main evaluation loop.
  while (1) {
    Constant *InstResult = nullptr;

    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");

    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
      if (!SI->isSimple()) {
        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
        return false;  // no volatile/atomic accesses.
      }
      Constant *Ptr = getVal(SI->getOperand(1));
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
      }
      if (!isSimpleEnoughPointerToCommit(Ptr)) {
        // If this is too complex for us to commit, reject it.
        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
        return false;
      }

      Constant *Val = getVal(SI->getOperand(0));

      // If this might be too difficult for the backend to handle (e.g. the addr
      // of one global variable divided by another) then we can't commit it.
      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
              << "\n");
        return false;
      }

      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
        if (CE->getOpcode() == Instruction::BitCast) {
          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
          // If we're evaluating a store through a bitcast, then we need
          // to pull the bitcast off the pointer type and push it onto the
          // stored value.
          Ptr = CE->getOperand(0);

          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();

          // In order to push the bitcast onto the stored value, a bitcast
          // from NewTy to Val's type must be legal.  If it's not, we can try
          // introspecting NewTy to find a legal conversion.
          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
            // If NewTy is a struct, we can convert the pointer to the struct
            // into a pointer to its first member.
            // FIXME: This could be extended to support arrays as well.
            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
              NewTy = STy->getTypeAtIndex(0U);

              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
              Constant * const IdxList[] = {IdxZero, IdxZero};

              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
                Ptr = ConstantFoldConstantExpression(CE, DL, TLI);

            // If we can't improve the situation by introspecting NewTy,
            // we have to give up.
            } else {
              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
                    "evaluate.\n");
              return false;
            }
          }

          // If we found compatible types, go ahead and push the bitcast
          // onto the stored value.
          Val = ConstantExpr::getBitCast(Val, NewTy);

          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
        }
      }

      MutatedMemory[Ptr] = Val;
    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
      InstResult = ConstantExpr::get(BO->getOpcode(),
                                     getVal(BO->getOperand(0)),
                                     getVal(BO->getOperand(1)));
      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
            << "\n");
    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
                                            getVal(CI->getOperand(0)),
                                            getVal(CI->getOperand(1)));
      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
            << "\n");
    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
      InstResult = ConstantExpr::getCast(CI->getOpcode(),
                                         getVal(CI->getOperand(0)),
                                         CI->getType());
      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
            << "\n");
    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
                                           getVal(SI->getOperand(1)),
                                           getVal(SI->getOperand(2)));
      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
            << "\n");
    } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
      InstResult = ConstantExpr::getExtractValue(
          getVal(EVI->getAggregateOperand()), EVI->getIndices());
      DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
                   << "\n");
    } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
      InstResult = ConstantExpr::getInsertValue(
          getVal(IVI->getAggregateOperand()),
          getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
      DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
                   << "\n");
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
      Constant *P = getVal(GEP->getOperand(0));
      SmallVector<Constant*, 8> GEPOps;
      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
           i != e; ++i)
        GEPOps.push_back(getVal(*i));
      InstResult =
          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
                                         cast<GEPOperator>(GEP)->isInBounds());
      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
            << "\n");
    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {

      if (!LI->isSimple()) {
        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
        return false;  // no volatile/atomic accesses.
      }

      Constant *Ptr = getVal(LI->getOperand(0));
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
        DEBUG(dbgs() << "Found a constant pointer expression, constant "
              "folding: " << *Ptr << "\n");
      }
      InstResult = ComputeLoadResult(Ptr);
      if (!InstResult) {
        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
              "\n");
        return false; // Could not evaluate load.
      }

      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
      if (AI->isArrayAllocation()) {
        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
        return false;  // Cannot handle array allocs.
      }
      Type *Ty = AI->getAllocatedType();
      AllocaTmps.push_back(
          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
                                      UndefValue::get(Ty), AI->getName()));
      InstResult = AllocaTmps.back().get();
      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
      CallSite CS(&*CurInst);

      // Debug info can safely be ignored here.
      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
        DEBUG(dbgs() << "Ignoring debug info.\n");
        ++CurInst;
        continue;
      }

      // Cannot handle inline asm.
      if (isa<InlineAsm>(CS.getCalledValue())) {
        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
        return false;
      }

      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
          if (MSI->isVolatile()) {
            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
                  "intrinsic.\n");
            return false;
          }
          Constant *Ptr = getVal(MSI->getDest());
          Constant *Val = getVal(MSI->getValue());
          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
            // This memset is a no-op.
            DEBUG(dbgs() << "Ignoring no-op memset.\n");
            ++CurInst;
            continue;
          }
        }

        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
            II->getIntrinsicID() == Intrinsic::lifetime_end) {
          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
          ++CurInst;
          continue;
        }

        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
          // We don't insert an entry into Values, as it doesn't have a
          // meaningful return value.
          if (!II->use_empty()) {
            DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
            return false;
          }
          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
          Value *PtrArg = getVal(II->getArgOperand(1));
          Value *Ptr = PtrArg->stripPointerCasts();
          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
            Type *ElemTy = GV->getValueType();
            if (!Size->isAllOnesValue() &&
                Size->getValue().getLimitedValue() >=
                    DL.getTypeStoreSize(ElemTy)) {
              Invariants.insert(GV);
              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
                    << "\n");
            } else {
              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
                    "invariant.\n");
            }
          }
          // Continue even if we do nothing.
          ++CurInst;
          continue;
        } else if (II->getIntrinsicID() == Intrinsic::assume) {
          DEBUG(dbgs() << "Skipping assume intrinsic.\n");
          ++CurInst;
          continue;
        }

        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
        return false;
      }

      // Resolve function pointers.
      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
      if (!Callee || Callee->mayBeOverridden()) {
        DEBUG(dbgs() << "Can not resolve function pointer.\n");
        return false;  // Cannot resolve.
      }

      SmallVector<Constant*, 8> Formals;
      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
        Formals.push_back(getVal(*i));

      if (Callee->isDeclaration()) {
        // If this is a function we can constant fold, do it.
        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
          InstResult = C;
          DEBUG(dbgs() << "Constant folded function call. Result: " <<
                *InstResult << "\n");
        } else {
          DEBUG(dbgs() << "Can not constant fold function call.\n");
          return false;
        }
      } else {
        if (Callee->getFunctionType()->isVarArg()) {
          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
          return false;
        }

        Constant *RetVal = nullptr;
        // Execute the call, if successful, use the return value.
        ValueStack.emplace_back();
        if (!EvaluateFunction(Callee, RetVal, Formals)) {
          DEBUG(dbgs() << "Failed to evaluate function.\n");
          return false;
        }
        ValueStack.pop_back();
        InstResult = RetVal;

        if (InstResult) {
          DEBUG(dbgs() << "Successfully evaluated function. Result: "
                       << *InstResult << "\n\n");
        } else {
          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
        }
      }
    } else if (isa<TerminatorInst>(CurInst)) {
      DEBUG(dbgs() << "Found a terminator instruction.\n");

      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
        if (BI->isUnconditional()) {
          NextBB = BI->getSuccessor(0);
        } else {
          ConstantInt *Cond =
            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
          if (!Cond) return false;  // Cannot determine.

          NextBB = BI->getSuccessor(!Cond->getZExtValue());
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
        ConstantInt *Val =
          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
        if (!Val) return false;  // Cannot determine.
        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
          NextBB = BA->getBasicBlock();
        else
          return false;  // Cannot determine.
      } else if (isa<ReturnInst>(CurInst)) {
        NextBB = nullptr;
      } else {
        // invoke, unwind, resume, unreachable.
        DEBUG(dbgs() << "Can not handle terminator.");
        return false;  // Cannot handle this terminator.
      }

      // We succeeded at evaluating this block!
      DEBUG(dbgs() << "Successfully evaluated block.\n");
      return true;
    } else {
      // Did not know how to evaluate this!
      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
            "\n");
      return false;
    }

    if (!CurInst->use_empty()) {
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
        InstResult = ConstantFoldConstantExpression(CE, DL, TLI);

      setVal(&*CurInst, InstResult);
    }

    // If we just processed an invoke, we finished evaluating the block.
    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
      NextBB = II->getNormalDest();
      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
      return true;
    }

    // Advance program counter.
    ++CurInst;
  }
}
/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
/// both be) and we have an icmp instruction with zero, and we have an 'and'
/// with the non-constant value and a power of two we can turn the select
/// into a shift on the result of the 'and'.
static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
                                ConstantInt *FalseVal,
                                InstCombiner::BuilderTy *Builder) {
  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
  if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
    return nullptr;

  if (!match(IC->getOperand(1), m_Zero()))
    return nullptr;

  ConstantInt *AndRHS;
  Value *LHS = IC->getOperand(0);
  if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
    return nullptr;

  // If both select arms are non-zero see if we have a select of the form
  // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
  // for 'x ? 2^n : 0' and fix the thing up at the end.
  ConstantInt *Offset = nullptr;
  if (!TrueVal->isZero() && !FalseVal->isZero()) {
    if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
      Offset = FalseVal;
    else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
      Offset = TrueVal;
    else
      return nullptr;

    // Adjust TrueVal and FalseVal to the offset.
    TrueVal = ConstantInt::get(Builder->getContext(),
                               TrueVal->getValue() - Offset->getValue());
    FalseVal = ConstantInt::get(Builder->getContext(),
                                FalseVal->getValue() - Offset->getValue());
  }

  // Make sure the mask in the 'and' and one of the select arms is a power of 2.
  if (!AndRHS->getValue().isPowerOf2() ||
      (!TrueVal->getValue().isPowerOf2() &&
       !FalseVal->getValue().isPowerOf2()))
    return nullptr;

  // Determine which shift is needed to transform result of the 'and' into the
  // desired result.
  ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
  unsigned ValZeros = ValC->getValue().logBase2();
  unsigned AndZeros = AndRHS->getValue().logBase2();

  // If types don't match we can still convert the select by introducing a zext
  // or a trunc of the 'and'. The trunc case requires that all of the truncated
  // bits are zero, we can figure that out by looking at the 'and' mask.
  if (AndZeros >= ValC->getBitWidth())
    return nullptr;

  Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
  if (ValZeros > AndZeros)
    V = Builder->CreateShl(V, ValZeros - AndZeros);
  else if (ValZeros < AndZeros)
    V = Builder->CreateLShr(V, AndZeros - ValZeros);

  // Okay, now we know that everything is set up, we just don't know whether we
  // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
  bool ShouldNotVal = !TrueVal->isZero();
  ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
  if (ShouldNotVal)
    V = Builder->CreateXor(V, ValC);

  // Apply an offset if needed.
  if (Offset)
    V = Builder->CreateAdd(V, Offset);
  return V;
}
示例#24
0
/// Perform simplification of memcpy's.  If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  // We can only optimize non-volatile memcpy's.
  if (M->isVolatile()) return false;

  // If the source and destination of the memcpy are the same, then zap it.
  if (M->getSource() == M->getDest()) {
    MD->removeInstruction(M);
    M->eraseFromParent();
    return false;
  }

  // If copying from a constant, try to turn the memcpy into a memset.
  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
    if (GV->isConstant() && GV->hasDefinitiveInitializer())
      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
        IRBuilder<> Builder(M);
        Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
                             M->getAlignment(), false);
        MD->removeInstruction(M);
        M->eraseFromParent();
        ++NumCpyToSet;
        return true;
      }

  MemDepResult DepInfo = MD->getDependency(M);

  // Try to turn a partially redundant memset + memcpy into
  // memcpy + smaller memset.  We don't need the memcpy size for this.
  if (DepInfo.isClobber())
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
      if (processMemSetMemCpyDependence(M, MDep))
        return true;

  // The optimizations after this point require the memcpy size.
  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
  if (!CopySize) return false;

  // There are four possible optimizations we can do for memcpy:
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
  //   b) call-memcpy xform for return slot optimization.
  //   c) memcpy from freshly alloca'd space or space that has just started its
  //      lifetime copies undefined data, and we can therefore eliminate the
  //      memcpy in favor of the data that was already at the destination.
  //   d) memcpy from a just-memset'd source can be turned into memset.
  if (DepInfo.isClobber()) {
    if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
      if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
                               CopySize->getZExtValue(), M->getAlignment(),
                               C)) {
        MD->removeInstruction(M);
        M->eraseFromParent();
        return true;
      }
    }
  }

  MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
      SrcLoc, true, M->getIterator(), M->getParent());

  if (SrcDepInfo.isClobber()) {
    if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
      return processMemCpyMemCpyDependence(M, MDep);
  } else if (SrcDepInfo.isDef()) {
    Instruction *I = SrcDepInfo.getInst();
    bool hasUndefContents = false;

    if (isa<AllocaInst>(I)) {
      hasUndefContents = true;
    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
        if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
          if (LTSize->getZExtValue() >= CopySize->getZExtValue())
            hasUndefContents = true;
    }

    if (hasUndefContents) {
      MD->removeInstruction(M);
      M->eraseFromParent();
      ++NumMemCpyInstr;
      return true;
    }
  }

  if (SrcDepInfo.isClobber())
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
      if (performMemCpyToMemSetOptzn(M, MDep)) {
        MD->removeInstruction(M);
        M->eraseFromParent();
        ++NumCpyToSet;
        return true;
      }

  return false;
}
示例#25
0
// Set outgoing edges alive dependent on the terminator instruction SI.
// If the terminator is an Invoke instruction, the call has already been run.
// Return true if anything changed.
bool IntegrationAttempt::checkBlockOutgoingEdges(ShadowInstruction* SI) {

  // TOCHECK: I think this only returns false if the block ends with an Unreachable inst?
  switch(SI->invar->I->getOpcode()) {
  case Instruction::Br:
  case Instruction::Switch:
  case Instruction::Invoke:
  case Instruction::Resume:
    break;
  default:
    return false;
  }

  if(inst_is<InvokeInst>(SI)) {

    InlineAttempt* IA = getInlineAttempt(SI);

    bool changed = false;

    // !localStore indicates the invoke instruction doesn't return normally
    if(SI->parent->localStore) {

      changed |= !SI->parent->succsAlive[0];
      SI->parent->succsAlive[0] = true;

    }      

    // I mark the exceptional edge reachable here if the call is disabled, even though
    // we might have proved it isn't feasible. This could be improved by converting the
    // invoke into a call in the final program.
    if((!IA) || (!IA->isEnabled()) || IA->mayUnwind) {

      changed |= !SI->parent->succsAlive[1];
      SI->parent->succsAlive[1] = true;

    }

    return changed;
    
  }
  else if(inst_is<ResumeInst>(SI)) {

    bool changed = !mayUnwind;
    mayUnwind = true;
    return changed;

  }
  else if(BranchInst* BI = dyn_cast_inst<BranchInst>(SI)) {

    if(BI->isUnconditional()) {
      bool changed = !SI->parent->succsAlive[0];
      SI->parent->succsAlive[0] = true;
      return changed;
    }

  }

  // Both switches and conditional branches use operand 0 for the condition.
  ShadowValue Condition = SI->getOperand(0);
      
  bool changed = false;
  
  ConstantInt* ConstCondition = dyn_cast_or_null<ConstantInt>(getConstReplacement(Condition));
  if(!ConstCondition) {

    if(Condition.t == SHADOWVAL_INST || Condition.t == SHADOWVAL_ARG) {

      // Switch statements can operate on a ptrtoint operand, of which only ptrtoint(null) is useful:
      if(ImprovedValSetSingle* IVS = dyn_cast_or_null<ImprovedValSetSingle>(getIVSRef(Condition))) {
	
	if(IVS->onlyContainsNulls()) {
	  
	  ConstCondition = cast<ConstantInt>(Constant::getNullValue(SI->invar->I->getOperand(0)->getType()));
	  
	}

      }

    }

  }

  if(!ConstCondition) {
    
    std::pair<ValSetType, ImprovedVal> PathVal;

    if(tryGetPathValue(Condition, SI->parent, PathVal))
      ConstCondition = dyn_cast_val<ConstantInt>(PathVal.second.V);

  }

  TerminatorInst* TI = cast_inst<TerminatorInst>(SI);
  const unsigned NumSucc = TI->getNumSuccessors();

  if(ConstCondition) {

    BasicBlock* takenTarget = 0;

    if(BranchInst* BI = dyn_cast_inst<BranchInst>(SI)) {
      // This ought to be a boolean.
      if(ConstCondition->isZero())
	takenTarget = BI->getSuccessor(1);
      else
	takenTarget = BI->getSuccessor(0);
    }
    else {
      SwitchInst* SwI = cast_inst<SwitchInst>(SI);
      SwitchInst::CaseIt targetidx = SwI->findCaseValue(ConstCondition);
      takenTarget = targetidx.getCaseSuccessor();
    }
    if(takenTarget) {
      // We know where the instruction is going -- remove this block as a predecessor for its other targets.
      LPDEBUG("Branch or switch instruction given known target: " << takenTarget->getName() << "\n");

      return setEdgeAlive(TI, SI->parent, takenTarget);

    }
    
    // Else fall through to set all alive.

  }

  SwitchInst* Switch;
  ImprovedValSetSingle* IVS;

  if((Switch = dyn_cast_inst<SwitchInst>(SI)) && 
     (IVS = dyn_cast<ImprovedValSetSingle>(getIVSRef(Condition))) && 
     IVS->SetType == ValSetTypeScalar && 
     !IVS->Values.empty()) {

    // A set of values feeding a switch. Set each corresponding edge alive.

    bool changed = false;

    for (unsigned i = 0, ilim = IVS->Values.size(); i != ilim; ++i) {
      
      SwitchInst::CaseIt targetit = Switch->findCaseValue(cast<ConstantInt>(getConstReplacement(IVS->Values[i].V)));
      BasicBlock* target = targetit.getCaseSuccessor();
      changed |= setEdgeAlive(TI, SI->parent, target);

    }

    return changed;

  }

  // Condition unknown -- set all successors alive.
  for (unsigned I = 0; I != NumSucc; ++I) {
    
    // Mark outgoing edge alive
    if(!SI->parent->succsAlive[I])
      changed = true;
    SI->parent->succsAlive[I] = true;
    
  }

  return changed;

}
示例#26
0
bool Instruction::isSafeToSpeculativelyExecute() const {
  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
      if (C->canTrap())
        return false;

  switch (getOpcode()) {
  default:
    return true;
  case UDiv:
  case URem: {
    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
    return Op && !Op->isNullValue();
  }
  case SDiv:
  case SRem: {
    // x / y is undefined if y == 0, and might be undefined if y == -1,
    // but calcuations like x / 3 are safe.
    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
  }
  case Load: {
    const LoadInst *LI = cast<LoadInst>(this);
    if (LI->isVolatile())
      return false;
    return LI->getPointerOperand()->isDereferenceablePointer();
  }
  case Call:
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(this)) {
      switch (II->getIntrinsicID()) {
      case Intrinsic::gla_abs:
      case Intrinsic::gla_addCarry:
      case Intrinsic::gla_all:
      case Intrinsic::gla_any:
      case Intrinsic::gla_bitCount:
      case Intrinsic::gla_bitFieldInsert:
      case Intrinsic::gla_bitReverse:
      // case Intrinsic::gla_discard:
      // case Intrinsic::gla_discardConditional:
      case Intrinsic::gla_fAbs:
      case Intrinsic::gla_fAcos:
      case Intrinsic::gla_fAcosh:
      case Intrinsic::gla_fAsin:
      case Intrinsic::gla_fAsinh:
      case Intrinsic::gla_fAtan:
      case Intrinsic::gla_fAtan2:
      case Intrinsic::gla_fAtanh:
      case Intrinsic::gla_fCeiling:
      case Intrinsic::gla_fClamp:
      case Intrinsic::gla_fCos:
      case Intrinsic::gla_fCosh:
      case Intrinsic::gla_fCross:
      case Intrinsic::gla_fDFdx:
      case Intrinsic::gla_fDFdy:
      case Intrinsic::gla_fDegrees:
      case Intrinsic::gla_fDistance:
      case Intrinsic::gla_fDot2:
      case Intrinsic::gla_fDot3:
      case Intrinsic::gla_fDot4:
      case Intrinsic::gla_fExp:
      case Intrinsic::gla_fExp10:
      case Intrinsic::gla_fExp2:
      case Intrinsic::gla_fFaceForward:
      case Intrinsic::gla_fFilterWidth:
      case Intrinsic::gla_fFixedTransform:
      case Intrinsic::gla_fFloatBitsToInt:
      case Intrinsic::gla_fFloor:
      case Intrinsic::gla_fFma:
      case Intrinsic::gla_fFraction:
      case Intrinsic::gla_fFrexp:
      case Intrinsic::gla_fIntBitsTofloat:
      case Intrinsic::gla_fInverseSqrt:
      case Intrinsic::gla_fIsInf:
      case Intrinsic::gla_fIsNan:
      case Intrinsic::gla_fLdexp:
      case Intrinsic::gla_fLength:
      case Intrinsic::gla_fLit:
      case Intrinsic::gla_fLog:
      case Intrinsic::gla_fLog10:
      case Intrinsic::gla_fLog2:
      case Intrinsic::gla_fMax:
      case Intrinsic::gla_fMin:
      case Intrinsic::gla_fMix:
      case Intrinsic::gla_fModF:
      case Intrinsic::gla_fMultiInsert:
      case Intrinsic::gla_fNormalize:
      case Intrinsic::gla_fNormalize3D:
      case Intrinsic::gla_fPackDouble2x32:
      case Intrinsic::gla_fPackSnorm4x8:
      case Intrinsic::gla_fPackUnorm2x16:
      case Intrinsic::gla_fPackUnorm4x8:
      case Intrinsic::gla_fPow:
      case Intrinsic::gla_fPowi:
      case Intrinsic::gla_fQueryTextureLod:
      case Intrinsic::gla_fRTextureSample1:
      case Intrinsic::gla_fRTextureSample2:
      case Intrinsic::gla_fRTextureSample3:
      case Intrinsic::gla_fRTextureSample4:
      case Intrinsic::gla_fRTextureSampleLodRefZ1:
      case Intrinsic::gla_fRTextureSampleLodRefZ2:
      case Intrinsic::gla_fRTextureSampleLodRefZ3:
      case Intrinsic::gla_fRTextureSampleLodRefZ4:
      case Intrinsic::gla_fRTextureSampleLodRefZOffset1:
      case Intrinsic::gla_fRTextureSampleLodRefZOffset2:
      case Intrinsic::gla_fRTextureSampleLodRefZOffset3:
      case Intrinsic::gla_fRTextureSampleLodRefZOffset4:
      case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad1:
      case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad2:
      case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad3:
      case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad4:
      case Intrinsic::gla_fRadians:
      case Intrinsic::gla_fReadData:
      case Intrinsic::gla_fReadInterpolant:
      case Intrinsic::gla_fReadInterpolantOffset:
      case Intrinsic::gla_fReflect:
      case Intrinsic::gla_fRefract:
      case Intrinsic::gla_fRoundEven:
      case Intrinsic::gla_fRoundFast:
      case Intrinsic::gla_fRoundZero:
      case Intrinsic::gla_fSign:
      case Intrinsic::gla_fSin:
      case Intrinsic::gla_fSinh:
      case Intrinsic::gla_fSmoothStep:
      case Intrinsic::gla_fSqrt:
      case Intrinsic::gla_fStep:
      case Intrinsic::gla_fSwizzle:
      case Intrinsic::gla_fTan:
      case Intrinsic::gla_fTanh:
      case Intrinsic::gla_fTexelFetchOffset:
      case Intrinsic::gla_fTexelGather:
      case Intrinsic::gla_fTexelGatherOffset:
      case Intrinsic::gla_fTexelGatherOffsets:
      case Intrinsic::gla_fTextureSample:
      case Intrinsic::gla_fTextureSampleLodRefZ:
      case Intrinsic::gla_fTextureSampleLodRefZOffset:
      case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
      case Intrinsic::gla_fUnpackDouble2x32:
      case Intrinsic::gla_fUnpackSnorm4x8:
      case Intrinsic::gla_fUnpackUnorm2x16:
      case Intrinsic::gla_fUnpackUnorm4x8:
      // case Intrinsic::gla_fWriteData:
      // case Intrinsic::gla_fWriteInterpolant:
      case Intrinsic::gla_findLSB:
      case Intrinsic::gla_getInterpolant:
      case Intrinsic::gla_multiInsert:
      case Intrinsic::gla_not:
      case Intrinsic::gla_queryTextureSize:
      case Intrinsic::gla_rTextureSample1:
      case Intrinsic::gla_rTextureSample2:
      case Intrinsic::gla_rTextureSample3:
      case Intrinsic::gla_rTextureSample4:
      case Intrinsic::gla_rTextureSampleLodRefZ1:
      case Intrinsic::gla_rTextureSampleLodRefZ2:
      case Intrinsic::gla_rTextureSampleLodRefZ3:
      case Intrinsic::gla_rTextureSampleLodRefZ4:
      case Intrinsic::gla_rTextureSampleLodRefZOffset1:
      case Intrinsic::gla_rTextureSampleLodRefZOffset2:
      case Intrinsic::gla_rTextureSampleLodRefZOffset3:
      case Intrinsic::gla_rTextureSampleLodRefZOffset4:
      case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad1:
      case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad2:
      case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad3:
      case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad4:
      case Intrinsic::gla_readData:
      case Intrinsic::gla_sBitFieldExtract:
      case Intrinsic::gla_sClamp:
      case Intrinsic::gla_sFindMSB:
      case Intrinsic::gla_sFma:
      case Intrinsic::gla_sMax:
      case Intrinsic::gla_sMin:
      case Intrinsic::gla_smulExtended:
      case Intrinsic::gla_subBorrow:
      case Intrinsic::gla_swizzle:
      case Intrinsic::gla_texelFetchOffset:
      case Intrinsic::gla_texelGather:
      case Intrinsic::gla_texelGatherOffset:
      case Intrinsic::gla_texelGatherOffsets:
      case Intrinsic::gla_textureSample:
      case Intrinsic::gla_textureSampleLodRefZ:
      case Intrinsic::gla_textureSampleLodRefZOffset:
      case Intrinsic::gla_textureSampleLodRefZOffsetGrad:
      case Intrinsic::gla_uBitFieldExtract:
      case Intrinsic::gla_uClamp:
      case Intrinsic::gla_uFindMSB:
      case Intrinsic::gla_uFma:
      case Intrinsic::gla_uMax:
      case Intrinsic::gla_uMin:
      case Intrinsic::gla_umulExtended:
      // case Intrinsic::gla_writeData:
          return true;
      default:
          break;
      }
    }


    return false; // The called function could have undefined behavior or
                  // side-effects.
                  // FIXME: We should special-case some intrinsics (bswap,
                  // overflow-checking arithmetic, etc.)
  case VAArg:
  case Alloca:
  case Invoke:
  case PHI:
  case Store:
  case Ret:
  case Br:
  case IndirectBr:
  case Switch:
  case Unwind:
  case Unreachable:
    return false; // Misc instructions which have effects
  }
}
示例#27
0
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
/// fold the operation into the addressing mode.  If so, update the addressing
/// mode and return true, otherwise return false without modifying AddrMode.
bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
                                               unsigned Depth) {
  // Avoid exponential behavior on extremely deep expression trees.
  if (Depth >= 5) return false;
  
  switch (Opcode) {
  case Instruction::PtrToInt:
    // PtrToInt is always a noop, as we know that the int type is pointer sized.
    return MatchAddr(AddrInst->getOperand(0), Depth);
  case Instruction::IntToPtr:
    // This inttoptr is a no-op if the integer type is pointer sized.
    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
        TLI.getPointerTy())
      return MatchAddr(AddrInst->getOperand(0), Depth);
    return false;
  case Instruction::BitCast:
    // BitCast is always a noop, and we can handle it as long as it is
    // int->int or pointer->pointer (we don't want int<->fp or something).
    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
        // Don't touch identity bitcasts.  These were probably put here by LSR,
        // and we don't want to mess around with them.  Assume it knows what it
        // is doing.
        AddrInst->getOperand(0)->getType() != AddrInst->getType())
      return MatchAddr(AddrInst->getOperand(0), Depth);
    return false;
  case Instruction::Add: {
    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
    ExtAddrMode BackupAddrMode = AddrMode;
    unsigned OldSize = AddrModeInsts.size();
    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
        MatchAddr(AddrInst->getOperand(0), Depth+1))
      return true;
    
    // Restore the old addr mode info.
    AddrMode = BackupAddrMode;
    AddrModeInsts.resize(OldSize);
    
    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
        MatchAddr(AddrInst->getOperand(1), Depth+1))
      return true;
    
    // Otherwise we definitely can't merge the ADD in.
    AddrMode = BackupAddrMode;
    AddrModeInsts.resize(OldSize);
    break;
  }
  //case Instruction::Or:
  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
  //break;
  case Instruction::Mul:
  case Instruction::Shl: {
    // Can only handle X*C and X << C.
    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
    if (!RHS) return false;
    int64_t Scale = RHS->getSExtValue();
    if (Opcode == Instruction::Shl)
      Scale = 1LL << Scale;
    
    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
  }
  case Instruction::GetElementPtr: {
    // Scan the GEP.  We check it if it contains constant offsets and at most
    // one variable offset.
    int VariableOperand = -1;
    unsigned VariableScale = 0;
    
    int64_t ConstantOffset = 0;
    const TargetData *TD = TLI.getTargetData();
    gep_type_iterator GTI = gep_type_begin(AddrInst);
    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
        const StructLayout *SL = TD->getStructLayout(STy);
        unsigned Idx =
          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
        ConstantOffset += SL->getElementOffset(Idx);
      } else {
        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
          ConstantOffset += CI->getSExtValue()*TypeSize;
        } else if (TypeSize) {  // Scales of zero don't do anything.
          // We only allow one variable index at the moment.
          if (VariableOperand != -1)
            return false;
          
          // Remember the variable index.
          VariableOperand = i;
          VariableScale = TypeSize;
        }
      }
    }
    
    // A common case is for the GEP to only do a constant offset.  In this case,
    // just add it to the disp field and check validity.
    if (VariableOperand == -1) {
      AddrMode.BaseOffs += ConstantOffset;
      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
        // Check to see if we can fold the base pointer in too.
        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
          return true;
      }
      AddrMode.BaseOffs -= ConstantOffset;
      return false;
    }

    // Save the valid addressing mode in case we can't match.
    ExtAddrMode BackupAddrMode = AddrMode;
    unsigned OldSize = AddrModeInsts.size();

    // See if the scale and offset amount is valid for this target.
    AddrMode.BaseOffs += ConstantOffset;

    // Match the base operand of the GEP.
    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
      // If it couldn't be matched, just stuff the value in a register.
      if (AddrMode.HasBaseReg) {
        AddrMode = BackupAddrMode;
        AddrModeInsts.resize(OldSize);
        return false;
      }
      AddrMode.HasBaseReg = true;
      AddrMode.BaseReg = AddrInst->getOperand(0);
    }

    // Match the remaining variable portion of the GEP.
    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
                          Depth)) {
      // If it couldn't be matched, try stuffing the base into a register
      // instead of matching it, and retrying the match of the scale.
      AddrMode = BackupAddrMode;
      AddrModeInsts.resize(OldSize);
      if (AddrMode.HasBaseReg)
        return false;
      AddrMode.HasBaseReg = true;
      AddrMode.BaseReg = AddrInst->getOperand(0);
      AddrMode.BaseOffs += ConstantOffset;
      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
                            VariableScale, Depth)) {
        // If even that didn't work, bail.
        AddrMode = BackupAddrMode;
        AddrModeInsts.resize(OldSize);
        return false;
      }
    }

    return true;
  }
  }
  return false;
}
int BranchProbabilities::CheckIntegerHeuristic()
{
    // Heuristic fails if the last instruction is not a conditional branch
    BranchInst *BI = dyn_cast<BranchInst>(_TI);
    if ((!BI) || (BI->isUnconditional()))
        return -1;

    // All integer comparisons are done with the icmp instruction
    ICmpInst *icmp = dyn_cast<ICmpInst>(BI->getCondition());
    if (!icmp)
        return -1;

    Value *v[2];
    v[0] = icmp->getOperand(0);
    v[1] = icmp->getOperand(1);

    // If neither is a constant, nothing to do
    if (!isa<ConstantInt>(v[0]) && !isa<ConstantInt>(v[1]))
        return -1;

    // If we're dealing with something other than ints, nothing to do
    if (!isa<IntegerType>(v[0]->getType()))
        return -1;

    // Get comparison
    ICmpInst::Predicate pred = icmp->getPredicate();

    // Eq and Not Eq are easy cases
    if (pred == ICmpInst::ICMP_EQ)
        return 1;
    else if (pred == ICmpInst::ICMP_NE)
        return 0;

    ConstantInt *CI = dyn_cast<ConstantInt>(v[1]);
    // If the right side isn't a constant, swap the predicate so we can pretend
    if (!CI)
    {
        pred = icmp->getSwappedPredicate();
        CI = cast<ConstantInt>(v[0]);
    }

    // Choose the appropriate branch depending on the const val and predicate
    if (CI->isZero())
    {
        switch (pred)
        {
        case ICmpInst::ICMP_UGE:
            assert("UGE zero always returns true");
            return -1;
        case ICmpInst::ICMP_ULT:
            assert("ULT zero always returns false");
            return -1;
        case ICmpInst::ICMP_UGT:
        case ICmpInst::ICMP_SGT:
        case ICmpInst::ICMP_SGE:
            return 0;
        case ICmpInst::ICMP_ULE:
        case ICmpInst::ICMP_SLT:
        case ICmpInst::ICMP_SLE:
            return 1;
        default:
            return -1;
        }
    }
    else if (CI->isOne())
    {
        switch (pred)
        {
        case ICmpInst::ICMP_UGE:
        case ICmpInst::ICMP_SGE:
            return 0;
        case ICmpInst::ICMP_ULT:
        case ICmpInst::ICMP_SLT:
            return 1;
        default:
            return -1;
        }
    }
    else if (CI->isAllOnesValue())
    {
        switch (pred)
        {
        case ICmpInst::ICMP_SGT:
            return 0;
        case ICmpInst::ICMP_SLE:
            return 1;
        default:
            return -1;
        }
    }

    return -1;
}
示例#29
0
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
                                     Value *cpyDest, Value *cpySrc,
                                     uint64_t cpyLen, unsigned cpyAlign,
                                     CallInst *C) {
    // The general transformation to keep in mind is
    //
    //   call @func(..., src, ...)
    //   memcpy(dest, src, ...)
    //
    // ->
    //
    //   memcpy(dest, src, ...)
    //   call @func(..., dest, ...)
    //
    // Since moving the memcpy is technically awkward, we additionally check that
    // src only holds uninitialized values at the moment of the call, meaning that
    // the memcpy can be discarded rather than moved.

    // Deliberately get the source and destination with bitcasts stripped away,
    // because we'll need to do type comparisons based on the underlying type.
    CallSite CS(C);

    // Require that src be an alloca.  This simplifies the reasoning considerably.
    AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
    if (!srcAlloca)
        return false;

    // Check that all of src is copied to dest.
    if (!DL) return false;

    ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
    if (!srcArraySize)
        return false;

    uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
                       srcArraySize->getZExtValue();

    if (cpyLen < srcSize)
        return false;

    // Check that accessing the first srcSize bytes of dest will not cause a
    // trap.  Otherwise the transform is invalid since it might cause a trap
    // to occur earlier than it otherwise would.
    if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
        // The destination is an alloca.  Check it is larger than srcSize.
        ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
        if (!destArraySize)
            return false;

        uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
                            destArraySize->getZExtValue();

        if (destSize < srcSize)
            return false;
    } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
        // If the destination is an sret parameter then only accesses that are
        // outside of the returned struct type can trap.
        if (!A->hasStructRetAttr())
            return false;

        Type *StructTy = cast<PointerType>(A->getType())->getElementType();
        if (!StructTy->isSized()) {
            // The call may never return and hence the copy-instruction may never
            // be executed, and therefore it's not safe to say "the destination
            // has at least <cpyLen> bytes, as implied by the copy-instruction",
            return false;
        }

        uint64_t destSize = DL->getTypeAllocSize(StructTy);
        if (destSize < srcSize)
            return false;
    } else {
        return false;
    }

    // Check that dest points to memory that is at least as aligned as src.
    unsigned srcAlign = srcAlloca->getAlignment();
    if (!srcAlign)
        srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
    bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
    // If dest is not aligned enough and we can't increase its alignment then
    // bail out.
    if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
        return false;

    // Check that src is not accessed except via the call and the memcpy.  This
    // guarantees that it holds only undefined values when passed in (so the final
    // memcpy can be dropped), that it is not read or written between the call and
    // the memcpy, and that writing beyond the end of it is undefined.
    SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
                                     srcAlloca->user_end());
    while (!srcUseList.empty()) {
        User *U = srcUseList.pop_back_val();

        if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
            for (User *UU : U->users())
                srcUseList.push_back(UU);
        } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
            if (G->hasAllZeroIndices())
                for (User *UU : U->users())
                    srcUseList.push_back(UU);
            else
                return false;
        } else if (U != C && U != cpy) {
            return false;
        }
    }

    // Check that src isn't captured by the called function since the
    // transformation can cause aliasing issues in that case.
    for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
        if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
            return false;

    // Since we're changing the parameter to the callsite, we need to make sure
    // that what would be the new parameter dominates the callsite.
    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
        if (!DT.dominates(cpyDestInst, C))
            return false;

    // In addition to knowing that the call does not access src in some
    // unexpected manner, for example via a global, which we deduce from
    // the use analysis, we also need to know that it does not sneakily
    // access dest.  We rely on AA to figure this out for us.
    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
    AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
    // If necessary, perform additional analysis.
    if (MR != AliasAnalysis::NoModRef)
        MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
    if (MR != AliasAnalysis::NoModRef)
        return false;

    // All the checks have passed, so do the transformation.
    bool changedArgument = false;
    for (unsigned i = 0; i < CS.arg_size(); ++i)
        if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
            Value *Dest = cpySrc->getType() == cpyDest->getType() ?  cpyDest
                          : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                  cpyDest->getName(), C);
            changedArgument = true;
            if (CS.getArgument(i)->getType() == Dest->getType())
                CS.setArgument(i, Dest);
            else
                CS.setArgument(i, CastInst::CreatePointerCast(Dest,
                               CS.getArgument(i)->getType(), Dest->getName(), C));
        }

    if (!changedArgument)
        return false;

    // If the destination wasn't sufficiently aligned then increase its alignment.
    if (!isDestSufficientlyAligned) {
        assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
        cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
    }

    // Drop any cached information about the call, because we may have changed
    // its dependence information by changing its parameter.
    MD->removeInstruction(C);

    // Remove the memcpy.
    MD->removeInstruction(cpy);
    ++NumMemCpyInstr;

    return true;
}
示例#30
0
static ReductionKind
matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
                              unsigned &Opcode, Type *&Ty) {
  if (!EnableReduxCost)
    return RK_None;

  // Need to extract the first element.
  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
  unsigned Idx = ~0u;
  if (CI)
    Idx = CI->getZExtValue();
  if (Idx != 0)
    return RK_None;

  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
  if (!RdxStart)
    return RK_None;
  Optional<ReductionData> RD = getReductionData(RdxStart);
  if (!RD)
    return RK_None;

  Type *VecTy = ReduxRoot->getOperand(0)->getType();
  unsigned NumVecElems = VecTy->getVectorNumElements();
  if (!isPowerOf2_32(NumVecElems))
    return RK_None;

  // We look for a sequence of shuffles and adds like the following matching one
  // fadd, shuffle vector pair at a time.
  //  
  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
  // %r = extractelement <4 x float> %bin.rdx8, i32 0

  unsigned MaskStart = 1;
  Instruction *RdxOp = RdxStart;
  SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 
  unsigned NumVecElemsRemain = NumVecElems;
  while (NumVecElemsRemain - 1) {
    // Check for the right reduction operation.
    if (!RdxOp)
      return RK_None;
    Optional<ReductionData> RDLevel = getReductionData(RdxOp);
    if (!RDLevel || !RDLevel->hasSameData(*RD))
      return RK_None;

    Value *NextRdxOp;
    ShuffleVectorInst *Shuffle;
    std::tie(NextRdxOp, Shuffle) =
        getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);

    // Check the current reduction operation and the shuffle use the same value.
    if (Shuffle == nullptr)
      return RK_None;
    if (Shuffle->getOperand(0) != NextRdxOp)
      return RK_None;

    // Check that shuffle masks matches.
    for (unsigned j = 0; j != MaskStart; ++j)
      ShuffleMask[j] = MaskStart + j;
    // Fill the rest of the mask with -1 for undef.
    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);

    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
    if (ShuffleMask != Mask)
      return RK_None;

    RdxOp = dyn_cast<Instruction>(NextRdxOp);
    NumVecElemsRemain /= 2;
    MaskStart *= 2;
  }

  Opcode = RD->Opcode;
  Ty = VecTy;
  return RD->Kind;
}