static bool hasPrivateLoadStore(Loop *L) {
        const std::vector<Loop*> subLoops = L->getSubLoops();
        std::set<BasicBlock*> subBlocks, blocks;

        for(auto l : subLoops)
          for(auto bb : l->getBlocks())
            subBlocks.insert(bb);
        for(auto bb : L->getBlocks())
          if (subBlocks.find(bb) == subBlocks.end())
            blocks.insert(bb);
        for(auto bb : blocks) {
          for (BasicBlock::iterator inst = bb->begin(), instE = bb->end(); inst != instE; ++inst) {
            unsigned addrSpace = -1;
            if (isa<LoadInst>(*inst)) {
              LoadInst *ld = cast<LoadInst>(&*inst);
              addrSpace = ld->getPointerAddressSpace();
            }
            else if (isa<StoreInst>(*inst)) {
              StoreInst *st = cast<StoreInst>(&*inst);
              addrSpace = st->getPointerAddressSpace();
            }
            if (addrSpace == 0)
              return true;
          }
        }
        return false;
      }
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
  if (!WidenLoads)
    return false;

  if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
       I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
      canWidenScalarExtLoad(I)) {
    IRBuilder<> Builder(&I);
    Builder.SetCurrentDebugLocation(I.getDebugLoc());

    Type *I32Ty = Builder.getInt32Ty();
    Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
    Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
    LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
    WidenLoad->copyMetadata(I);

    // If we have range metadata, we need to convert the type, and not make
    // assumptions about the high bits.
    if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
      ConstantInt *Lower =
        mdconst::extract<ConstantInt>(Range->getOperand(0));

      if (Lower->getValue().isNullValue()) {
        WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
      } else {
        Metadata *LowAndHigh[] = {
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
          // Don't make assumptions about the high bits.
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
        };

        WidenLoad->setMetadata(LLVMContext::MD_range,
                               MDNode::get(Mod->getContext(), LowAndHigh));
      }
    }

    int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
    Type *IntNTy = Builder.getIntNTy(TySize);
    Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
    Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
    I.replaceAllUsesWith(ValOrig);
    I.eraseFromParent();
    return true;
  }

  return false;
}
void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) {
    unsigned AS = LI.getPointerAddressSpace();
    if (!isSpecialAS(AS))
        return;
    Value *Replacement = LiftPointer(LI.getPointerOperand(), LI.getType(), &LI);
    if (!Replacement)
        return;
    LI.setOperand(LoadInst::getPointerOperandIndex(), Replacement);
}
void X86InterleavedAccessGroup::decompose(
    Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
    SmallVectorImpl<Instruction *> &DecomposedVectors) {

  assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
         "Expected Load or Shuffle");

  Type *VecTy = VecInst->getType();
  (void)VecTy;
  assert(VecTy->isVectorTy() &&
         DL.getTypeSizeInBits(VecTy) >=
             DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
         "Invalid Inst-size!!!");

  if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) {
    Value *Op0 = SVI->getOperand(0);
    Value *Op1 = SVI->getOperand(1);

    // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type.
    for (unsigned i = 0; i < NumSubVectors; ++i)
      DecomposedVectors.push_back(
          cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
              Op0, Op1,
              createSequentialMask(Builder, Indices[i],
                                   SubVecTy->getVectorNumElements(), 0))));
    return;
  }

  // Decompose the load instruction.
  LoadInst *LI = cast<LoadInst>(VecInst);
  Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
  Value *VecBasePtr;
  unsigned int NumLoads = NumSubVectors;
  // In the case of stride 3 with a vector of 32 elements load the information
  // in the following way:
  // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
  if (DL.getTypeSizeInBits(VecTy) == 768) {
    Type *VecTran =
        VectorType::get(Type::getInt8Ty(LI->getContext()), 16)->getPointerTo();
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTran);
    NumLoads = NumSubVectors * 2;
  } else
    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
  // Generate N loads of T type.
  for (unsigned i = 0; i < NumLoads; i++) {
    // TODO: Support inbounds GEP.
    Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
    Instruction *NewLoad =
        Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
    DecomposedVectors.push_back(NewLoad);
  }
}
Example #5
0
PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
  auto &DL = LI.getModule()->getDataLayout();

  PointerOffsetPair POP;
  POP.Pointer = LI.getPointerOperand();
  unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
  POP.Offset = APInt(BitWidth, 0);

  while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
    if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
      APInt LastOffset = POP.Offset;
      if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
        // Can't handle GEPs with variable indices.
        POP.Offset = LastOffset;
        return POP;
      }
      POP.Pointer = GEP->getPointerOperand();
    } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
      POP.Pointer = BC->getOperand(0);
    }
  }
  return POP;
}
Example #6
0
unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
  if (!VTTI)
    return -1;

  switch (I->getOpcode()) {
  case Instruction::Ret:
  case Instruction::PHI:
  case Instruction::Br: {
    return VTTI->getCFInstrCost(I->getOpcode());
  }
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
  }
  case Instruction::Select: {
    SelectInst *SI = cast<SelectInst>(I);
    Type *CondTy = SI->getCondition()->getType();
    return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
  }
  case Instruction::ICmp:
  case Instruction::FCmp: {
    Type *ValTy = I->getOperand(0)->getType();
    return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
  }
  case Instruction::Store: {
    StoreInst *SI = cast<StoreInst>(I);
    Type *ValTy = SI->getValueOperand()->getType();
    return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
                                 SI->getAlignment(),
                                 SI->getPointerAddressSpace());
  }
  case Instruction::Load: {
    LoadInst *LI = cast<LoadInst>(I);
    return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
                                 LI->getAlignment(),
                                 LI->getPointerAddressSpace());
  }
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::FPToUI:
  case Instruction::FPToSI:
  case Instruction::FPExt:
  case Instruction::PtrToInt:
  case Instruction::IntToPtr:
  case Instruction::SIToFP:
  case Instruction::UIToFP:
  case Instruction::Trunc:
  case Instruction::FPTrunc:
  case Instruction::BitCast: {
    Type *SrcTy = I->getOperand(0)->getType();
    return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
  }
  case Instruction::ExtractElement: {
    ExtractElementInst * EEI = cast<ExtractElementInst>(I);
    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
    unsigned Idx = -1;
    if (CI)
      Idx = CI->getZExtValue();
    return VTTI->getVectorInstrCost(I->getOpcode(),
                                    EEI->getOperand(0)->getType(), Idx);
  }
  case Instruction::InsertElement: {
      InsertElementInst * IE = cast<InsertElementInst>(I);
      ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
      unsigned Idx = -1;
      if (CI)
        Idx = CI->getZExtValue();
      return VTTI->getVectorInstrCost(I->getOpcode(),
                                      IE->getType(), Idx);
    }
  default:
    // We don't have any information on this instruction.
    return -1;
  }
}
Example #7
0
LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
  // See if we already have a value for this block.
  LVILatticeVal BBLV = getCachedEntryForBlock(BB);
  
  // If we've already computed this block's value, return it.
  if (!BBLV.isUndefined()) {
    DEBUG(dbgs() << "  reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
    return BBLV;
  }

  // Otherwise, this is the first time we're seeing this block.  Reset the
  // lattice value to overdefined, so that cycles will terminate and be
  // conservatively correct.
  BBLV.markOverdefined();
  Cache[BB] = BBLV;
  
  Instruction *BBI = dyn_cast<Instruction>(Val);
  if (BBI == 0 || BBI->getParent() != BB) {
    LVILatticeVal Result;  // Start Undefined.
    
    // If this is a pointer, and there's a load from that pointer in this BB,
    // then we know that the pointer can't be NULL.
    bool NotNull = false;
    if (Val->getType()->isPointerTy()) {
      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
        LoadInst *L = dyn_cast<LoadInst>(BI);
        if (L && L->getPointerAddressSpace() == 0 &&
            L->getPointerOperand()->getUnderlyingObject() ==
              Val->getUnderlyingObject()) {
          NotNull = true;
          break;
        }
      }
    }
    
    unsigned NumPreds = 0;    
    // Loop over all of our predecessors, merging what we know from them into
    // result.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      Result.mergeIn(getEdgeValue(*PI, BB));
      
      // If we hit overdefined, exit early.  The BlockVals entry is already set
      // to overdefined.
      if (Result.isOverdefined()) {
        DEBUG(dbgs() << " compute BB '" << BB->getName()
                     << "' - overdefined because of pred.\n");
        // If we previously determined that this is a pointer that can't be null
        // then return that rather than giving up entirely.
        if (NotNull) {
          const PointerType *PTy = cast<PointerType>(Val->getType());
          Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
        }
        
        return Result;
      }
      ++NumPreds;
    }
    
    
    // If this is the entry block, we must be asking about an argument.  The
    // value is overdefined.
    if (NumPreds == 0 && BB == &BB->getParent()->front()) {
      assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
      Result.markOverdefined();
      return Result;
    }
    
    // Return the merged value, which is more precise than 'overdefined'.
    assert(!Result.isOverdefined());
    return Cache[BB] = Result;
  }
  
  // If this value is defined by an instruction in this block, we have to
  // process it here somehow or return overdefined.
  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
    LVILatticeVal Result;  // Start Undefined.
    
    // Loop over all of our predecessors, merging what we know from them into
    // result.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      Value* PhiVal = PN->getIncomingValueForBlock(*PI);
      Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB));
      
      // If we hit overdefined, exit early.  The BlockVals entry is already set
      // to overdefined.
      if (Result.isOverdefined()) {
        DEBUG(dbgs() << " compute BB '" << BB->getName()
                     << "' - overdefined because of pred.\n");
        return Result;
      }
    }
    
    // Return the merged value, which is more precise than 'overdefined'.
    assert(!Result.isOverdefined());
    return Cache[BB] = Result;
  }

  assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?");

  // We can only analyze the definitions of certain classes of instructions
  // (integral binops and casts at the moment), so bail if this isn't one.
  LVILatticeVal Result;
  if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
     !BBI->getType()->isIntegerTy()) {
    DEBUG(dbgs() << " compute BB '" << BB->getName()
                 << "' - overdefined because inst def found.\n");
    Result.markOverdefined();
    return Result;
  }
   
  // FIXME: We're currently limited to binops with a constant RHS.  This should
  // be improved.
  BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
  if (BO && !isa<ConstantInt>(BO->getOperand(1))) { 
    DEBUG(dbgs() << " compute BB '" << BB->getName()
                 << "' - overdefined because inst def found.\n");

    Result.markOverdefined();
    return Result;
  }  

  // Figure out the range of the LHS.  If that fails, bail.
  LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB);
  if (!LHSVal.isConstantRange()) {
    Result.markOverdefined();
    return Result;
  }
  
  ConstantInt *RHS = 0;
  ConstantRange LHSRange = LHSVal.getConstantRange();
  ConstantRange RHSRange(1);
  const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
  if (isa<BinaryOperator>(BBI)) {
    RHS = dyn_cast<ConstantInt>(BBI->getOperand(1));
    if (!RHS) {
      Result.markOverdefined();
      return Result;
    }
    
    RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1);
  }
      
  // NOTE: We're currently limited by the set of operations that ConstantRange
  // can evaluate symbolically.  Enhancing that set will allows us to analyze
  // more definitions.
  switch (BBI->getOpcode()) {
  case Instruction::Add:
    Result.markConstantRange(LHSRange.add(RHSRange));
    break;
  case Instruction::Sub:
    Result.markConstantRange(LHSRange.sub(RHSRange));
    break;
  case Instruction::Mul:
    Result.markConstantRange(LHSRange.multiply(RHSRange));
    break;
  case Instruction::UDiv:
    Result.markConstantRange(LHSRange.udiv(RHSRange));
    break;
  case Instruction::Shl:
    Result.markConstantRange(LHSRange.shl(RHSRange));
    break;
  case Instruction::LShr:
    Result.markConstantRange(LHSRange.lshr(RHSRange));
    break;
  case Instruction::Trunc:
    Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
    break;
  case Instruction::SExt:
    Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
    break;
  case Instruction::ZExt:
    Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
    break;
  case Instruction::BitCast:
    Result.markConstantRange(LHSRange);
    break;
  case Instruction::And:
    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
    break;
  case Instruction::Or:
    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
    break;
  
  // Unhandled instructions are overdefined.
  default:
    DEBUG(dbgs() << " compute BB '" << BB->getName()
                 << "' - overdefined because inst def found.\n");
    Result.markOverdefined();
    break;
  }
  
  return Cache[BB] = Result;
}
Example #8
0
int qdp_jit_vec::vectorize_loads( std::vector<std::vector<Instruction*> >& load_instructions )
{
  DEBUG(dbgs() << "Vectorize loads, total of " << load_instructions.size() << "\n");

  //std::vector<std::pair<Value*,Value*> > scalar_vector_loads;
  scalar_vector_pairs.clear();

  if (load_instructions.empty())
    return 0;

  int load_vec_elem = 0;
  for( std::vector<Instruction*>& VI : load_instructions ) {
    DEBUG(dbgs() << "Processing vector of loads number " << load_vec_elem++ << "\n");
    assert( VI.size() == vec_len && "length of vector of loads does not match vec_len" );
    int loads_consec = true;
    uint64_t lo,hi;
    bool first = true;
    for( Instruction* I : VI ) {
      GetElementPtrInst* GEP;
      if ((GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0)))) {
	if (first) {
	  ConstantInt * CI;
	  if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) {
	    lo = CI->getZExtValue();
	    hi = lo+1;
	    first=false;
	  } else {
	    DEBUG(dbgs() << "First load in the chain: Operand of GEP not a ConstantInt" << *GEP->getOperand(1) << "\n");
	    assert( 0 && "First load in the chain: Operand of GEP not a ConstantInt\n");
	    exit(0);
	  }
	} else {
	  ConstantInt * CI;
	  if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) {
	    if (hi != CI->getZExtValue()) {
	      DEBUG(dbgs() << "Loads not consecutive lo=" << lo << " hi=" << hi << " this=" << CI->getZExtValue() << "\n");
	      loads_consec = false;
	    } else {
	      hi++;
	    }
	  }
	}
      } else {
	DEBUG(dbgs() << "Operand of load not a GEP " << *I->getOperand(0) << "\n");
	assert( 0 && "Operand of load not a GEP" );
	exit(0);
	loads_consec = false;
      }
    }
    if (loads_consec) {
      DEBUG(dbgs() << "Loads consecuetive\n");

      LoadInst* LI = cast<LoadInst>(VI.at(0));
      GetElementPtrInst* GEP = cast<GetElementPtrInst>(LI->getOperand(0));
      Instruction* GEPcl = clone_with_operands(GEP);
      unsigned AS = LI->getPointerAddressSpace();
      VectorType *VecTy = VectorType::get( LI->getType() , vec_len );

      unsigned bitwidth = LI->getType()->getPrimitiveSizeInBits();
      unsigned bytewidth = bitwidth == 1 ? 1 : bitwidth/8;
      DEBUG(dbgs() << "bit/byte width of load instr trype: " << bitwidth << "/" << bytewidth << "\n");
 
      //Builder->SetInsertPoint( GEP );
      Value *VecPtr = Builder->CreateBitCast(GEPcl,VecTy->getPointerTo(AS));
      //Value *VecLoad = Builder->CreateLoad( VecPtr );
      
      unsigned align = lo % vec_len == 0 ? bytewidth * vec_len : bytewidth;
      Value *VecLoad = Builder->CreateAlignedLoad( VecPtr , align );

      //DEBUG(dbgs() << "created vector load: " << *VecLoad << "\n");
      //function->dump();

      // unsigned AS = LI->getPointerAddressSpace();
      // VectorType *VecTy = VectorType::get( LI->getType() , vec_len );
      // Builder->SetInsertPoint( LI );
      // Value *VecPtr = Builder->CreateBitCast(LI->getPointerOperand(),VecTy->getPointerTo(AS));
      // Value *VecLoad = Builder->CreateLoad( VecPtr );

      scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , VecLoad ) );
    } else {
      DEBUG(dbgs() << "Loads not consecutive:\n");
      for (Value* V: VI) {
	DEBUG(dbgs() << *V << "\n");
      }

      //Instruction* I = dyn_cast<Instruction>(VI.back()->getNextNode());
      //DEBUG(dbgs() << *I << "\n");

      //Builder->SetInsertPoint( VI.at(0) );


      std::vector<Instruction*> VIcl;
      for( Instruction* I : VI ) {
	VIcl.push_back( clone_with_operands(I) );
      }

      VectorType *VecTy = VectorType::get( VI.at(0)->getType() , vec_len );
      Value *Vec = UndefValue::get(VecTy);

      int i=0;
      for( Instruction* I : VIcl ) {
	Vec = Builder->CreateInsertElement(Vec, I, Builder->getInt32(i++));
      }

      scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , Vec ) );      
    }
  }

  //vectorize_all_uses( scalar_vector_loads );

  DEBUG(dbgs() << "Searching for the stores:\n");
  //function->dump();

  //
  // Vectorize all StoreInst reachable by the first load of each vector of loads
  //
  {
    SetVector<Instruction*> to_visit;
    SetVector<Instruction*> stores_processed;
    for( std::vector<Instruction*>& VI : load_instructions ) {
      to_visit.insert(VI.at(0));
    }
    while (!to_visit.empty()) {
      Instruction* I = to_visit.back();
      to_visit.pop_back();
      DEBUG(dbgs() << "visiting " << *I << "\n");
      if (StoreInst* SI = dyn_cast<StoreInst>(I)) {
	if (!stores_processed.count(SI)) {
	  get_vector_version( SI );
	  stores_processed.insert( SI );
	}
      } else {
	for (Use &U : I->uses()) {
	  Value* V = U.getUser();
	  to_visit.insert(cast<Instruction>(V));
	}
      }
    }
  }

  // DEBUG(dbgs() << "After vectorizing the stores\n");
  // function->dump();

  //
  // Mark all stores as being processed
  //
  SetVector<Instruction*> to_visit;
  for( std::vector<Instruction*>& VI : load_instructions ) {
    for( Instruction* I : VI ) {
      to_visit.insert(I);
      if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0))) {
	for_erasure.insert(GEP);
      }
    }
  }
  while (!to_visit.empty()) {
    Instruction* I = to_visit.back();
    to_visit.pop_back();
    for_erasure.insert(I);
    if (StoreInst* SI = dyn_cast<StoreInst>(I)) {
      stores_processed.insert(SI);
      if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(SI->getOperand(1))) {
	for_erasure.insert(GEP);
      }
    } else {
      for (Use &U : I->uses()) {
	Value* V = U.getUser();
	to_visit.insert(cast<Instruction>(V));
      }
    }
  }
  
  DEBUG(dbgs() << "----------------------------------------\n");
  DEBUG(dbgs() << "After vectorize_loads\n");
  //function->dump();

  return 0;
}