void X86InterleavedAccessGroup::decompose(
    Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
    SmallVectorImpl<Instruction *> &DecomposedVectors) {

  assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
         "Expected Load or Shuffle");

  Type *VecTy = VecInst->getType();
  (void)VecTy;
  assert(VecTy->isVectorTy() &&
         DL.getTypeSizeInBits(VecTy) >=
             DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
         "Invalid Inst-size!!!");

  if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) {
    Value *Op0 = SVI->getOperand(0);
    Value *Op1 = SVI->getOperand(1);

    // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type.
    for (unsigned i = 0; i < NumSubVectors; ++i)
      DecomposedVectors.push_back(
          cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
              Op0, Op1,
              createSequentialMask(Builder, Indices[i],
                                   SubVecTy->getVectorNumElements(), 0))));
    return;
  }

  // Decompose the load instruction.
  LoadInst *LI = cast<LoadInst>(VecInst);
  Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
  Value *VecBasePtr;
  unsigned int NumLoads = NumSubVectors;
  // In the case of stride 3 with a vector of 32 elements load the information
  // in the following way:
  // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
  if (DL.getTypeSizeInBits(VecTy) == 768) {
    Type *VecTran =
        VectorType::get(Type::getInt8Ty(LI->getContext()), 16)->getPointerTo();
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTran);
    NumLoads = NumSubVectors * 2;
  } else
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
  // Generate N loads of T type.
  for (unsigned i = 0; i < NumLoads; i++) {
    // TODO: Support inbounds GEP.
    Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
    Instruction *NewLoad =
        Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
    DecomposedVectors.push_back(NewLoad);
  }
}
bool IRTranslator::translateLoad(const LoadInst &LI) {
  assert(LI.isSimple() && "only simple loads are supported at the moment");

  MachineFunction &MF = MIRBuilder.getMF();
  unsigned Res = getOrCreateVReg(LI);
  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
  LLT VTy{*LI.getType()}, PTy{*LI.getPointerOperand()->getType()};

  MIRBuilder.buildLoad(
      VTy, PTy, Res, Addr,
      *MF.getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
                               MachineMemOperand::MOLoad,
                               VTy.getSizeInBits() / 8, getMemOpAlignment(LI)));
  return true;
}
void InstrumentMemoryAccesses::visitLoadInst(LoadInst &LI) {
  // Instrument a load instruction with a load check.
  Value *AccessSize = ConstantInt::get(SizeTy,
                                       TD->getTypeStoreSize(LI.getType()));
  instrument(LI.getPointerOperand(), AccessSize, LoadCheckFunction, LI);
  ++LoadsInstrumented;
}
Exemple #4
0
void Interpreter::visitLoadInst(LoadInst &I) {
  ExecutionContext &SF = ECStack.back();
  GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
  GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
  GenericValue Result = LoadValueFromMemory(Ptr, I.getType());
  SetValue(&I, Result, SF);
}
Exemple #5
0
// Instrumenting some of the accesses may be proven redundant.
// Currently handled:
//  - read-before-write (within same BB, no calls between)
//
// We do not handle some of the patterns that should not survive
// after the classic compiler optimizations.
// E.g. two reads from the same temp should be eliminated by CSE,
// two writes should be eliminated by DSE, etc.
//
// 'Local' is a vector of insns within the same BB (no calls between).
// 'All' is a vector of insns that will be instrumented.
void ThreadSanitizer::chooseInstructionsToInstrument(
    SmallVectorImpl<Instruction*> &Local,
    SmallVectorImpl<Instruction*> &All) {
  SmallSet<Value*, 8> WriteTargets;
  // Iterate from the end.
  for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
       E = Local.rend(); It != E; ++It) {
    Instruction *I = *It;
    if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
      WriteTargets.insert(Store->getPointerOperand());
    } else {
      LoadInst *Load = cast<LoadInst>(I);
      Value *Addr = Load->getPointerOperand();
      if (WriteTargets.count(Addr)) {
        // We will write to this temp, so no reason to analyze the read.
        NumOmittedReadsBeforeWrite++;
        continue;
      }
      if (addrPointsToConstantData(Addr)) {
        // Addr points to some constant data -- it can not race with any writes.
        continue;
      }
    }
    All.push_back(I);
  }
  Local.clear();
}
Exemple #6
0
void GraphBuilder::visitLoadInst(LoadInst &LI) {
  //
  // Create a DSNode for the pointer dereferenced by the load.  If the DSNode
  // is NULL, do nothing more (this can occur if the load is loading from a
  // NULL pointer constant (bugpoint can generate such code).
  //
  DSNodeHandle Ptr = getValueDest(LI.getPointerOperand());
  if (Ptr.isNull()) return; // Load from null

  // Make that the node is read from...
  Ptr.getNode()->setReadMarker();

  // Ensure a typerecord exists...
  Ptr.getNode()->growSizeForType(LI.getType(), Ptr.getOffset());

  if (isa<PointerType>(LI.getType()))
    setDestTo(LI, getLink(Ptr));

  // check that it is the inserted value
  if(TypeInferenceOptimize)
    if(LI.hasOneUse())
      if(StoreInst *SI = dyn_cast<StoreInst>(*(LI.use_begin())))
        if(SI->getOperand(0) == &LI) {
        ++NumIgnoredInst;
        return;
      }
  Ptr.getNode()->mergeTypeInfo(LI.getType(), Ptr.getOffset());
}
void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) {
    unsigned AS = LI.getPointerAddressSpace();
    if (!isSpecialAS(AS))
        return;
    Value *Replacement = LiftPointer(LI.getPointerOperand(), LI.getType(), &LI);
    if (!Replacement)
        return;
    LI.setOperand(LoadInst::getPointerOperandIndex(), Replacement);
}
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
  Value *Ptr = I.getPointerOperand();
  if (!DA->isUniform(Ptr))
    return;

  if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
    setUniformMetadata(PtrI);

}
Exemple #9
0
	LoadInst* getLoopViLoad(Loop *L)
	{
    	AllocaInst* viAlloc = getLoopVi(L);
    	//Instruction* firstHeaderInstr = L->getHeader()->begin();
    	Instruction* firstHeaderInstr = L->getHeader()->getFirstNonPHI();

    	//If such load exists, return it and don't create a new one.
    	LoadInst* firstHeaderInstrLoad = dyn_cast<LoadInst>(firstHeaderInstr);
    	if(firstHeaderInstrLoad && firstHeaderInstrLoad->getPointerOperand() == viAlloc)
        	return firstHeaderInstrLoad;
    	return new LoadInst(viAlloc, viAlloc->getName() + ".load", firstHeaderInstr);
	}
Exemple #10
0
void smtit::performTest1() {

  for (Module::iterator FI = Mod->begin(), FE = Mod->end(); FI != FE; ++FI) {
    Function *Func = &*FI;
    // DEBUG(errs() << *Func << "\n");
    for (Function::iterator BI = Func->begin(), BE = Func->end(); BI != BE;
         ++BI) {
      BasicBlock *BB = &*BI;
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
        Instruction *BBI = &*I;
        //if (true == isa<StoreInst>(BBI)) {
        if (true == isa<LoadInst>(BBI)) {
          LoadInst *li  = dyn_cast<LoadInst>(BBI);
          Value *ptrOp = li->getPointerOperand();
          DEBUG(errs() << *li << "\t Result Name: " << li->getName() << "\t Pointer Name: " << ptrOp->getName() << "\n");

          // DEBUG(errs() << "\tStore Instruction: " << *BBI << " \n");
          // DEBUG(errs() << "\t\tPointerType: " << isLLVMPAPtrTy(SI->getType())
          // << " \n");
          // Instruction* V = cast<Instruction>(SI->getOperand(1));
          // DEBUG(errs() << "\tOperand : " << *V << " \n");
          // DEBUG(errs() << "\t\tPointerType: " << isLLVMPAPtrTy(V->getType())
          // << " \n");
        } else if(true == isa<GetElementPtrInst>(BBI)) {
          GetElementPtrInst *gep  = dyn_cast<GetElementPtrInst>(BBI);
          DEBUG(errs() << *gep << "\t Result Name: " << gep->getName() << "\n");
          // DEBUG(errs() << "\tInstruction: " << *BBI << " \n");
          // DEBUG(errs() << "\t\tPointerType: " <<
          // isLLVMPAPtrTy(BBI->getType()) << " \n");
        }

        // For def-use chains: All the uses of the definition
        //DEBUG(errs() << *BBI << "\n");
        /*
        for (User *U : BBI->users()) {
          if (Instruction *Inst = dyn_cast<Instruction>(U)) {
            DEBUG(errs()<< " " <<  *Inst << "\n");
          }
        }

        for (Value::user_iterator i = BBI->user_begin(), e = BBI->user_end();
              i != e; ++i) {
          if (Instruction *user_inst = dyn_cast<Instruction>(*i)) {
            DEBUG(errs()<< " " << *user_inst << "\n");
          }
        }
        */
      }
    }
  }
}
Exemple #11
0
bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
    Instruction *Addr) const {
  AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
  Function *Func = (*Blocks.begin())->getParent();
  for (BasicBlock &BB : *Func) {
    if (Blocks.count(&BB))
      continue;
    for (Instruction &II : BB) {

      if (isa<DbgInfoIntrinsic>(II))
        continue;

      unsigned Opcode = II.getOpcode();
      Value *MemAddr = nullptr;
      switch (Opcode) {
      case Instruction::Store:
      case Instruction::Load: {
        if (Opcode == Instruction::Store) {
          StoreInst *SI = cast<StoreInst>(&II);
          MemAddr = SI->getPointerOperand();
        } else {
          LoadInst *LI = cast<LoadInst>(&II);
          MemAddr = LI->getPointerOperand();
        }
        // Global variable can not be aliased with locals.
        if (dyn_cast<Constant>(MemAddr))
          break;
        Value *Base = MemAddr->stripInBoundsConstantOffsets();
        if (!dyn_cast<AllocaInst>(Base) || Base == AI)
          return false;
        break;
      }
      default: {
        IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
        if (IntrInst) {
          if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
              IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
            break;
          return false;
        }
        // Treat all the other cases conservatively if it has side effects.
        if (II.mayHaveSideEffects())
          return false;
      }
      }
    }
  }

  return true;
}
void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
    Type *Ty = LI.getType();
    if (Ty->isPointerTy()) {
        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
        Check(AS != AddressSpace::CalleeRooted &&
              AS != AddressSpace::Derived,
              "Illegal load of gc relevant value", &LI);
    }
    Ty = LI.getPointerOperand()->getType();
    if (Ty->isPointerTy()) {
        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
        Check(AS != AddressSpace::CalleeRooted,
              "Illegal store of callee rooted value", &LI);
    }
}
void ArrayIndexChecker::visitLoadInst(LoadInst& I) {
  DEBUG(dbgs() << "ArrayIndexChecker: visiting load " << I << "\n");

  visitValue(*I.getPointerOperand());

  if (I.getType()->isPointerTy()) {
    auto pos = std::find(ptr_value_vec_.begin(), ptr_value_vec_.end(), &I);
    assert(pos != ptr_value_vec_.end());
    index_t varIdx = pos - ptr_value_vec_.begin();

    assert(idx2addr_.find(varIdx) != idx2addr_.end());
    if (addr2version_[idx2addr_[varIdx]] != 0)
      throw ArrayIndexIsNotConstant;
  }
  DEBUG(dbgs() << "ArrayIndexChecker: visited load\n");
}
// For Falkor, we want to avoid having too many strided loads in a loop since
// that can exhaust the HW prefetcher resources.  We adjust the unroller
// MaxCount preference below to attempt to ensure unrolling doesn't create too
// many strided loads.
static void
getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                              TargetTransformInfo::UnrollingPreferences &UP) {
  enum { MaxStridedLoads = 7 };
  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
    int StridedLoads = 0;
    // FIXME? We could make this more precise by looking at the CFG and
    // e.g. not counting loads in each side of an if-then-else diamond.
    for (const auto BB : L->blocks()) {
      for (auto &I : *BB) {
        LoadInst *LMemI = dyn_cast<LoadInst>(&I);
        if (!LMemI)
          continue;

        Value *PtrValue = LMemI->getPointerOperand();
        if (L->isLoopInvariant(PtrValue))
          continue;

        const SCEV *LSCEV = SE.getSCEV(PtrValue);
        const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
        if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
          continue;

        // FIXME? We could take pairing of unrolled load copies into account
        // by looking at the AddRec, but we would probably have to limit this
        // to loops with no stores or other memory optimization barriers.
        ++StridedLoads;
        // We've seen enough strided loads that seeing more won't make a
        // difference.
        if (StridedLoads > MaxStridedLoads / 2)
          return StridedLoads;
      }
    }
    return StridedLoads;
  };

  int StridedLoads = countStridedLoads(L, SE);
  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
                    << " strided loads\n");
  // Pick the largest power of 2 unroll count that won't result in too many
  // strided loads.
  if (StridedLoads) {
    UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
    LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
                      << UP.MaxCount << '\n');
  }
}
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
  if (!WidenLoads)
    return false;

  if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
       I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
      canWidenScalarExtLoad(I)) {
    IRBuilder<> Builder(&I);
    Builder.SetCurrentDebugLocation(I.getDebugLoc());

    Type *I32Ty = Builder.getInt32Ty();
    Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
    Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
    LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
    WidenLoad->copyMetadata(I);

    // If we have range metadata, we need to convert the type, and not make
    // assumptions about the high bits.
    if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
      ConstantInt *Lower =
        mdconst::extract<ConstantInt>(Range->getOperand(0));

      if (Lower->getValue().isNullValue()) {
        WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
      } else {
        Metadata *LowAndHigh[] = {
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
          // Don't make assumptions about the high bits.
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
        };

        WidenLoad->setMetadata(LLVMContext::MD_range,
                               MDNode::get(Mod->getContext(), LowAndHigh));
      }
    }

    int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
    Type *IntNTy = Builder.getIntNTy(TySize);
    Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
    Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
    I.replaceAllUsesWith(ValOrig);
    I.eraseFromParent();
    return true;
  }

  return false;
}
/// \brief Check loop instructions safe for Loop versioning.
/// It returns true if it's safe else returns false.
/// Consider following:
/// 1) Check all load store in loop body are non atomic & non volatile.
/// 2) Check function call safety, by ensuring its not accessing memory.
/// 3) Loop body shouldn't have any may throw instruction.
bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
  assert(I != nullptr && "Null instruction found!");
  // Check function call safety
  if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) {
    DEBUG(dbgs() << "    Unsafe call site found.\n");
    return false;
  }
  // Avoid loops with possiblity of throw
  if (I->mayThrow()) {
    DEBUG(dbgs() << "    May throw instruction found in loop body\n");
    return false;
  }
  // If current instruction is load instructions
  // make sure it's a simple load (non atomic & non volatile)
  if (I->mayReadFromMemory()) {
    LoadInst *Ld = dyn_cast<LoadInst>(I);
    if (!Ld || !Ld->isSimple()) {
      DEBUG(dbgs() << "    Found a non-simple load.\n");
      return false;
    }
    LoadAndStoreCounter++;
    collectStridedAccess(Ld);
    Value *Ptr = Ld->getPointerOperand();
    // Check loop invariant.
    if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
      InvariantCounter++;
  }
  // If current instruction is store instruction
  // make sure it's a simple store (non atomic & non volatile)
  else if (I->mayWriteToMemory()) {
    StoreInst *St = dyn_cast<StoreInst>(I);
    if (!St || !St->isSimple()) {
      DEBUG(dbgs() << "    Found a non-simple store.\n");
      return false;
    }
    LoadAndStoreCounter++;
    collectStridedAccess(St);
    Value *Ptr = St->getPointerOperand();
    // Check loop invariant.
    if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
      InvariantCounter++;

    IsReadOnlyLoop = false;
  }
  return true;
}
Exemple #17
0
void TracingNoGiri::visitLoadInst(LoadInst &LI) {
  instrumentLock(&LI);

  // Get the ID of the load instruction.
  Value *LoadID = ConstantInt::get(Int32Type, lsNumPass->getID(&LI));
  // Cast the pointer into a void pointer type.
  Value *Pointer = LI.getPointerOperand();
  Pointer = castTo(Pointer, VoidPtrType, Pointer->getName(), &LI);
  // Get the size of the loaded data.
  uint64_t size = TD->getTypeStoreSize(LI.getType());
  Value *LoadSize = ConstantInt::get(Int64Type, size);
  // Create the call to the run-time to record the load instruction.
  std::vector<Value *> args=make_vector<Value *>(LoadID, Pointer, LoadSize, 0);
  CallInst::Create(RecordLoad, args, "", &LI);

  instrumentUnlock(&LI);
  ++NumLoads; // Update statistics
}
Exemple #18
0
// Instrumenting some of the accesses may be proven redundant.
// Currently handled:
//  - read-before-write (within same BB, no calls between)
//  - not captured variables
//
// We do not handle some of the patterns that should not survive
// after the classic compiler optimizations.
// E.g. two reads from the same temp should be eliminated by CSE,
// two writes should be eliminated by DSE, etc.
//
// 'Local' is a vector of insns within the same BB (no calls between).
// 'All' is a vector of insns that will be instrumented.
void ThreadSanitizer::chooseInstructionsToInstrument(
    SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
    const DataLayout &DL) {
  SmallSet<Value*, 8> WriteTargets;
  // Iterate from the end.
  for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
       E = Local.rend(); It != E; ++It) {
    Instruction *I = *It;
    if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
      Value *Addr = Store->getPointerOperand();
      if (!shouldInstrumentReadWriteFromAddress(Addr))
        continue;
      WriteTargets.insert(Addr);
    } else {
      LoadInst *Load = cast<LoadInst>(I);
      Value *Addr = Load->getPointerOperand();
      if (!shouldInstrumentReadWriteFromAddress(Addr))
        continue;
      if (WriteTargets.count(Addr)) {
        // We will write to this temp, so no reason to analyze the read.
        NumOmittedReadsBeforeWrite++;
        continue;
      }
      if (addrPointsToConstantData(Addr)) {
        // Addr points to some constant data -- it can not race with any writes.
        continue;
      }
    }
    Value *Addr = isa<StoreInst>(*I)
        ? cast<StoreInst>(I)->getPointerOperand()
        : cast<LoadInst>(I)->getPointerOperand();
    if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
        !PointerMayBeCaptured(Addr, true, true)) {
      // The variable is addressable but not captured, so it cannot be
      // referenced from a different thread and participate in a data race
      // (see llvm/Analysis/CaptureTracking.h for details).
      NumOmittedNonCaptured++;
      continue;
    }
    All.push_back(I);
  }
  Local.clear();
}
PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
  PointerOffsetPair POP;
  POP.Pointer = LI.getPointerOperand();
  POP.Offset = 0;
  while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
    if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
      unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
      APInt Offset(BitWidth, 0);
      if (GEP->accumulateConstantOffset(*DL, Offset))
        POP.Offset += Offset.getZExtValue();
      else
        // Can't handle GEPs with variable indices.
        return POP;
      POP.Pointer = GEP->getPointerOperand();
    } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
      POP.Pointer = BC->getOperand(0);
  }
  return POP;
}
// Not an instruction handled below to turn into a vector.
//
// TODO: Check isTriviallyVectorizable for calls and handle other
// instructions.
static bool canVectorizeInst(Instruction *Inst, User *User) {
  switch (Inst->getOpcode()) {
  case Instruction::Load: {
    LoadInst *LI = cast<LoadInst>(Inst);
    // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
    return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
  }
  case Instruction::BitCast:
  case Instruction::AddrSpaceCast:
    return true;
  case Instruction::Store: {
    // Must be the stored pointer operand, not a stored value, plus
    // since it should be canonical form, the User should be a GEP.
    StoreInst *SI = cast<StoreInst>(Inst);
    return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
  }
  default:
    return false;
  }
}
/// Try to fold load I.
bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) {
  Value *AddrOp = I.getPointerOperand();

  auto AddressIt = SimplifiedAddresses.find(AddrOp);
  if (AddressIt == SimplifiedAddresses.end())
    return false;
  ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;

  auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
  // We're only interested in loads that can be completely folded to a
  // constant.
  if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
    return false;

  ConstantDataSequential *CDS =
      dyn_cast<ConstantDataSequential>(GV->getInitializer());
  if (!CDS)
    return false;

  // We might have a vector load from an array. FIXME: for now we just bail
  // out in this case, but we should be able to resolve and simplify such
  // loads.
  if(!CDS->isElementTypeCompatible(I.getType()))
    return false;

  int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
  if (SimplifiedAddrOp->getValue().getActiveBits() >= 64)
    return false;
  int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
  if (Index >= CDS->getNumElements()) {
    // FIXME: For now we conservatively ignore out of bound accesses, but
    // we're allowed to perform the optimization in this case.
    return false;
  }

  Constant *CV = CDS->getElementAsConstant(Index);
  assert(CV && "Constant expected.");
  SimplifiedValues[&I] = CV;

  return true;
}
Exemple #22
0
void ExecutorUtil::checkLoadInst(Instruction *inst, 
                                 std::vector<GlobalSharedTaint> &glSet, 
                                 std::vector<GlobalSharedTaint> &sharedSet, 
                                 AliasAnalysis &AA, 
                                 RelFlowSet &flowSet) {
  bool relToShared = false;
  LoadInst *load = dyn_cast<LoadInst>(inst); 
  Value *pointer = load->getPointerOperand();
 
  for (unsigned i = 0; i < sharedSet.size(); i++) {
    if (ExecutorUtil::findValueFromTaintSet(pointer, 
                                            sharedSet[i].instSet, 
                                            sharedSet[i].valueSet)) {
      // Related to shared
      if (Verbose > 0) {
        std::cout << "shared load inst: " << std::endl;
        inst->dump();
      }
      flowSet.sharedReadVec.insert(sharedSet[i].gv);
      relToShared = true;
      break;
    }
  }

  if (!relToShared) {
    for (unsigned i = 0; i < glSet.size(); i++) {
      if (ExecutorUtil::findValueFromTaintSet(pointer,
                                              glSet[i].instSet, 
                                              glSet[i].valueSet)) {
        // Related to global 
        flowSet.globalReadVec.insert(glSet[i].gv);
        if (Verbose > 0) {
          std::cout << "global load inst: " << std::endl;
          inst->dump();
        }
        break; 
      }
    }
  }
} 
Exemple #23
0
bool Scalarizer::visitLoadInst(LoadInst &LI) {
  if (!ScalarizeLoadStore)
    return false;
  if (!LI.isSimple())
    return false;

  VectorLayout Layout;
  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
    return false;

  unsigned NumElems = Layout.VecTy->getNumElements();
  IRBuilder<> Builder(LI.getParent(), &LI);
  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
  ValueVector Res;
  Res.resize(NumElems);

  for (unsigned I = 0; I < NumElems; ++I)
    Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
                                       LI.getName() + ".i" + Twine(I));
  gather(&LI, Res);
  return true;
}
Exemple #24
0
PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
  auto &DL = LI.getModule()->getDataLayout();

  PointerOffsetPair POP;
  POP.Pointer = LI.getPointerOperand();
  unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
  POP.Offset = APInt(BitWidth, 0);

  while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
    if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
      APInt LastOffset = POP.Offset;
      if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
        // Can't handle GEPs with variable indices.
        POP.Offset = LastOffset;
        return POP;
      }
      POP.Pointer = GEP->getPointerOperand();
    } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
      POP.Pointer = BC->getOperand(0);
    }
  }
  return POP;
}
// -- handle load instruction -- 
void UnsafeTypeCastingCheck::handleLoadInstruction (Instruction *inst) {
  LoadInst *linst = dyn_cast<LoadInst>(inst); 
  if (linst == NULL) 
    utccAbort("handleLoadInstruction cannot process with a non-load instruction"); 

  Value *pt = linst->getPointerOperand(); 
  
  // check if pointing to global value (NOTE: a global value is always a pointer) 
  if (ConstantExpr *cexpr = dyn_cast<ConstantExpr>(pt)) {
    if (cexpr->getOpcode() == Instruction::GetElementPtr && 
	cexpr->getNumOperands() >= 1) {
      GlobalValue *gpointer = dyn_cast<GlobalValue>(cexpr->getOperand(0)); 
      pt = gpointer; 
      UTCC_TYPE ptt = queryPointedType(pt); 
      setExprType(linst, ptt); 
      return; 
    }
  }

  // the final (default) handle 
  UTCC_TYPE ptt = queryPointedType(pt); 
  setExprType(linst, ptt); 
}
Exemple #26
0
void Lint::visitLoadInst(LoadInst &I) {
  visitMemoryReference(I, I.getPointerOperand(),
                       DL->getTypeStoreSize(I.getType()), I.getAlignment(),
                       I.getType(), MemRef::Read);
}
/// tryAggregating - When scanning forward over instructions, we look for
/// other loads or stores that could be aggregated with this one.
/// Returns the last instruction added (if one was added) since we might have
/// removed some loads or stores and that might invalidate an iterator.
Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr,
    bool DebugThis) {
  if (TD == 0) return 0;

  Module* M = StartInst->getParent()->getParent()->getParent();
  LLVMContext& Context = StartInst->getContext();

  Type* int8Ty = Type::getInt8Ty(Context);
  Type* sizeTy = Type::getInt64Ty(Context);
  Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace);
  bool isLoad = isa<LoadInst>(StartInst);
  bool isStore = isa<StoreInst>(StartInst);
  Instruction *lastAddedInsn = NULL;
  Instruction *LastLoadOrStore = NULL;
 
  SmallVector<Instruction*, 8> toRemove;

  // Okay, so we now have a single global load/store. Scan to find
  // all subsequent stores of the same value to offset from the same pointer.
  // Join these together into ranges, so we can decide whether contiguous blocks
  // are stored.
  MemOpRanges Ranges(*TD);
 
  // Put the first store in since we want to preserve the order.
  Ranges.addInst(0, StartInst);

  BasicBlock::iterator BI = StartInst;
  for (++BI; !isa<TerminatorInst>(BI); ++BI) {

    if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) {
      // OK!
    } else {
      // If the instruction is readnone, ignore it, otherwise bail out.  We
      // don't even allow readonly here because we don't want something like:
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
      if (BI->mayWriteToMemory())
        break;
      if (isStore && BI->mayReadFromMemory())
        break;
      continue;
    }

    if ( isStore && isa<StoreInst>(BI) ) {
      StoreInst *NextStore = cast<StoreInst>(BI);
      // If this is a store, see if we can merge it in.
      if (!NextStore->isSimple()) break;

      // Check to see if this store is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addStore(Offset, NextStore);
      LastLoadOrStore = NextStore;
    } else {
      LoadInst *NextLoad = cast<LoadInst>(BI);
      if (!NextLoad->isSimple()) break;

      // Check to see if this load is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addLoad(Offset, NextLoad);
      LastLoadOrStore = NextLoad;
    }
  }

  // If we have no ranges, then we just had a single store with nothing that
  // could be merged in.  This is a very common case of course.
  if (!Ranges.moreThanOneOp())
    return 0;

  // Divide the instructions between StartInst and LastLoadOrStore into
  // addressing, memops, and uses of memops (uses of loads)
  reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis);

  Instruction* insertBefore = StartInst;
  IRBuilder<> builder(insertBefore);

  // Now that we have full information about ranges, loop over the ranges and
  // emit memcpy's for anything big enough to be worthwhile.
  for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
       I != E; ++I) {
    const MemOpRange &Range = *I;
    Value* oldBaseI = NULL;
    Value* newBaseI = NULL;

    if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing...

    builder.SetInsertPoint(insertBefore);

    // Otherwise, we do want to transform this!  Create a new memcpy.
    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;

    if( DebugThis ) {
      errs() << "base is:";
      StartPtr->dump();
    }

    // Determine alignment
    unsigned Alignment = Range.Alignment;
    if (Alignment == 0) {
      Type *EltType =
        cast<PointerType>(StartPtr->getType())->getElementType();
      Alignment = TD->getABITypeAlignment(EltType);
    }

    Instruction *alloc = NULL;
    Value *globalPtr = NULL;

    // create temporary alloca space to communicate to/from.
    alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore,
                       Range.End-Range.Start, Alignment);

    // Generate the old and new base pointers before we output
    // anything else.
    {
      Type* iPtrTy = TD->getIntPtrType(alloc->getType());
      Type* iNewBaseTy = TD->getIntPtrType(alloc->getType());
      oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i");
      newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i");
    }

    // If storing, do the stores we had into our alloca'd region.
    if( isStore ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        StoreInst* oldStore = cast<StoreInst>(*SI);

        if( DebugThis ) {
          errs() << "have store in range:";
          oldStore->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldStore->isVolatile() || oldStore->isAtomic()));
        StoreInst* newStore =
          builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc);
        newStore->setAlignment(oldStore->getAlignment());
        newStore->takeName(oldStore);
      }
    }

    // cast the pointer that was load/stored to i8 if necessary.
    if( StartPtr->getType()->getPointerElementType() == int8Ty ) {
      globalPtr = StartPtr;
    } else {
      globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast");
    }

    // Get a Constant* for the length.
    Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false);

    // Now add the memcpy instruction
    unsigned addrSpaceDst,addrSpaceSrc;
    addrSpaceDst = addrSpaceSrc = 0;
    if( isStore ) addrSpaceDst = globalSpace;
    if( isLoad ) addrSpaceSrc = globalSpace;

    Type *types[3];
    types[0] = PointerType::get(int8Ty, addrSpaceDst);
    types[1] = PointerType::get(int8Ty, addrSpaceSrc);
    types[2] = sizeTy;

    Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types);

    Value* args[5]; // dst src len alignment isvolatile
    if( isStore ) {
      // it's a store (ie put)
      args[0] = globalPtr;
      args[1] = alloc;
    } else {
      // it's a load (ie get)
      args[0] = alloc;
      args[1] = globalPtr;
    }
    args[2] = len;
    // alignment
    args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false);
    // isvolatile
    args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false);

    Instruction* aMemCpy = builder.CreateCall(func, args);

    /*
    DEBUG(dbgs() << "Replace ops:\n";
      for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
        dbgs() << *Range.TheStores[i] << '\n';
      dbgs() << "With: " << *AMemSet << '\n');
      */

    if (!Range.TheStores.empty())
      aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc());

    lastAddedInsn = aMemCpy;

    // If loading, load from the memcpy'd region
    if( isLoad ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        LoadInst* oldLoad = cast<LoadInst>(*SI);
        if( DebugThis ) {
          errs() << "have load in range:";
          oldLoad->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldLoad->isVolatile() || oldLoad->isAtomic()));
        LoadInst* newLoad = builder.CreateLoad(ptrToAlloc);
        newLoad->setAlignment(oldLoad->getAlignment());
        oldLoad->replaceAllUsesWith(newLoad);
        newLoad->takeName(oldLoad);
        lastAddedInsn = newLoad;
      }
    }

    // Save old loads/stores for removal
    for (SmallVector<Instruction*, 16>::const_iterator
         SI = Range.TheStores.begin(),
         SE = Range.TheStores.end(); SI != SE; ++SI) {
      Instruction* insn = *SI;
      toRemove.push_back(insn);
    }
  }

  // Zap all the old loads/stores
  for (SmallVector<Instruction*, 16>::const_iterator
       SI = toRemove.begin(),
       SE = toRemove.end(); SI != SE; ++SI) {
    (*SI)->eraseFromParent();
  }

  return lastAddedInsn;
}
Exemple #28
0
/// isSafeToPromoteArgument - As you might guess from the name of this method,
/// it checks to see if it is both safe and useful to promote the argument.
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
  typedef std::set<IndicesVector> GEPIndicesSet;

  // Quick exit for unused arguments
  if (Arg->use_empty())
    return true;

  // We can only promote this argument if all of the uses are loads, or are GEP
  // instructions (with constant indices) that are subsequently loaded.
  //
  // Promoting the argument causes it to be loaded in the caller
  // unconditionally. This is only safe if we can prove that either the load
  // would have happened in the callee anyway (ie, there is a load in the entry
  // block) or the pointer passed in at every call site is guaranteed to be
  // valid.
  // In the former case, invalid loads can happen, but would have happened
  // anyway, in the latter case, invalid loads won't happen. This prevents us
  // from introducing an invalid load that wouldn't have happened in the
  // original code.
  //
  // This set will contain all sets of indices that are loaded in the entry
  // block, and thus are safe to unconditionally load in the caller.
  GEPIndicesSet SafeToUnconditionallyLoad;

  // This set contains all the sets of indices that we are planning to promote.
  // This makes it possible to limit the number of arguments added.
  GEPIndicesSet ToPromote;

  // If the pointer is always valid, any load with first index 0 is valid.
  if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));

  // First, iterate the entry block and mark loads of (geps of) arguments as
  // safe.
  BasicBlock *EntryBlock = Arg->getParent()->begin();
  // Declare this here so we can reuse it
  IndicesVector Indices;
  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
       I != E; ++I)
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      Value *V = LI->getPointerOperand();
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
        V = GEP->getPointerOperand();
        if (V == Arg) {
          // This load actually loads (part of) Arg? Check the indices then.
          Indices.reserve(GEP->getNumIndices());
          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
               II != IE; ++II)
            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
              Indices.push_back(CI->getSExtValue());
            else
              // We found a non-constant GEP index for this argument? Bail out
              // right away, can't promote this argument at all.
              return false;

          // Indices checked out, mark them as safe
          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
          Indices.clear();
        }
      } else if (V == Arg) {
        // Direct loads are equivalent to a GEP with a single 0 index.
        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
      }
    }

  // Now, iterate all uses of the argument to see if there are any uses that are
  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
  SmallVector<LoadInst*, 16> Loads;
  IndicesVector Operands;
  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
       UI != E; ++UI) {
    User *U = *UI;
    Operands.clear();
    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
      if (LI->isVolatile()) return false;  // Don't hack volatile loads
      Loads.push_back(LI);
      // Direct loads are equivalent to a GEP with a zero index and then a load.
      Operands.push_back(0);
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
      if (GEP->use_empty()) {
        // Dead GEP's cause trouble later.  Just remove them if we run into
        // them.
        getAnalysis<AliasAnalysis>().deleteValue(GEP);
        GEP->eraseFromParent();
        // TODO: This runs the above loop over and over again for dead GEPs
        // Couldn't we just do increment the UI iterator earlier and erase the
        // use?
        return isSafeToPromoteArgument(Arg, isByVal);
      }

      // Ensure that all of the indices are constants.
      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
        i != e; ++i)
        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
          Operands.push_back(C->getSExtValue());
        else
          return false;  // Not a constant operand GEP!

      // Ensure that the only users of the GEP are load instructions.
      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
           UI != E; ++UI)
        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
          if (LI->isVolatile()) return false;  // Don't hack volatile loads
          Loads.push_back(LI);
        } else {
          // Other uses than load?
          return false;
        }
    } else {
      return false;  // Not a load or a GEP.
    }

    // Now, see if it is safe to promote this load / loads of this GEP. Loading
    // is safe if Operands, or a prefix of Operands, is marked as safe.
    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
      return false;

    // See if we are already promoting a load with these indices. If not, check
    // to make sure that we aren't promoting too many elements.  If so, nothing
    // to do.
    if (ToPromote.find(Operands) == ToPromote.end()) {
      if (maxElements > 0 && ToPromote.size() == maxElements) {
        DEBUG(dbgs() << "argpromotion not promoting argument '"
              << Arg->getName() << "' because it would require adding more "
              << "than " << maxElements << " arguments to the function.\n");
        // We limit aggregate promotion to only promoting up to a fixed number
        // of elements of the aggregate.
        return false;
      }
      ToPromote.insert(Operands);
    }
  }

  if (Loads.empty()) return true;  // No users, this is a dead argument.

  // Okay, now we know that the argument is only used by load instructions and
  // it is safe to unconditionally perform all of them. Use alias analysis to
  // check to see if the pointer is guaranteed to not be modified from entry of
  // the function to each of the load instructions.

  // Because there could be several/many load instructions, remember which
  // blocks we know to be transparent to the load.
  SmallPtrSet<BasicBlock*, 16> TranspBlocks;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false; // Without TargetData, assume the worst.

  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
    // Check to see if the load is invalidated from the start of the block to
    // the load itself.
    LoadInst *Load = Loads[i];
    BasicBlock *BB = Load->getParent();

    const PointerType *LoadTy =
      cast<PointerType>(Load->getPointerOperand()->getType());
    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());

    if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
      return false;  // Pointer is invalidated!

    // Now check every path from the entry block to the load for transparency.
    // To do this, we perform a depth first search on the inverse CFG from the
    // loading block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *P = *PI;
      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
             I = idf_ext_begin(P, TranspBlocks),
             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
        if (AA.canBasicBlockModify(**I, Arg, LoadSize))
          return false;
    }
  }

  // If the path from the entry of the function to each load is free of
  // instructions that potentially invalidate the load, we can make the
  // transformation!
  return true;
}
Exemple #29
0
void Lint::visitLoadInst(LoadInst &I) {
  visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType());
}
Exemple #30
0
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
  Type *ScalarTy = VL[0]->getType();
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
    ScalarTy = SI->getValueOperand()->getType();
  VectorType *VecTy = VectorType::get(ScalarTy, VF);

  // Check if all of the operands are constants or identical.
  bool AllConst = true;
  bool AllSameScalar = true;
  for (unsigned i = 0, e = VF; i < e; ++i) {
    AllConst &= !!dyn_cast<Constant>(VL[i]);
    AllSameScalar &= (VL[0] == VL[i]);
    // Must have a single use.
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    if (I && (I->getNumUses() > 1 || I->getParent() != BB))
      return Scalarize(VL, VecTy);
  }

  // Is this a simple vector constant.
  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);

  // Scalarize unknown structures.
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
  if (!VL0) return Scalarize(VL, VecTy);

  unsigned Opcode = VL0->getOpcode();
  for (unsigned i = 0, e = VF; i < e; ++i) {
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    // If not all of the instructions are identical then we have to scalarize.
    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
  }

  switch (Opcode) {
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    ValueList LHSVL, RHSVL;
    for (int i = 0; i < VF; ++i) {
      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
    }

    Value *RHS = vectorizeTree(RHSVL, VF);
    Value *LHS = vectorizeTree(LHSVL, VF);
    IRBuilder<> Builder(GetLastInstr(VL, VF));
    BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
    return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
  }
  case Instruction::Load: {
    LoadInst *LI = dyn_cast<LoadInst>(VL0);
    unsigned Alignment = LI->getAlignment();

    // Check if all of the loads are consecutive.
    for (unsigned i = 1, e = VF; i < e; ++i)
      if (!isConsecutiveAccess(VL[i-1], VL[i]))
        return Scalarize(VL, VecTy);

    IRBuilder<> Builder(GetLastInstr(VL, VF));
    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
                                          VecTy->getPointerTo());
    LI = Builder.CreateLoad(VecPtr);
    LI->setAlignment(Alignment);
    return LI;
  }
  case Instruction::Store: {
    StoreInst *SI = dyn_cast<StoreInst>(VL0);
    unsigned Alignment = SI->getAlignment();

    ValueList ValueOp;
    for (int i = 0; i < VF; ++i)
      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());

    Value *VecValue = vectorizeTree(ValueOp, VF);

    IRBuilder<> Builder(GetLastInstr(VL, VF));
    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
                                          VecTy->getPointerTo());
    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);

    for (int i = 0; i < VF; ++i)
      cast<Instruction>(VL[i])->eraseFromParent();
    return 0;
  }
  default:
    return Scalarize(VL, VecTy);
  }
}