// Instrument memset/memmove/memcpy
bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
                                              MemIntrinsic *MI) {
  Value *Dst = MI->getDest();
  MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
  Value *Src = MemTran ? MemTran->getSource() : 0;
  Value *Length = MI->getLength();

  Constant *ConstLength = dyn_cast<Constant>(Length);
  Instruction *InsertBefore = MI;
  if (ConstLength) {
    if (ConstLength->isNullValue()) return false;
  } else {
    // The size is not a constant so it could be zero -- check at run-time.
    IRBuilder<> IRB(InsertBefore);

    Value *Cmp = IRB.CreateICmpNE(Length,
                                  Constant::getNullValue(Length->getType()));
    InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
  }

  instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
  if (Src)
    instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
  return true;
}
/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
static bool
isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
                               SmallVectorImpl<Instruction *> &ToDelete) {
  // We track lifetime intrinsics as we encounter them.  If we decide to go
  // ahead and replace the value with the global, this lets the caller quickly
  // eliminate the markers.

  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
  ValuesToInspect.push_back(std::make_pair(V, false));
  while (!ValuesToInspect.empty()) {
    auto ValuePair = ValuesToInspect.pop_back_val();
    const bool IsOffset = ValuePair.second;
    for (auto &U : ValuePair.first->uses()) {
      Instruction *I = cast<Instruction>(U.getUser());

      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
        // Ignore non-volatile loads, they are always ok.
        if (!LI->isSimple()) return false;
        continue;
      }

      if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
        // If uses of the bitcast are ok, we are ok.
        ValuesToInspect.push_back(std::make_pair(I, IsOffset));
        continue;
      }
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
        // If the GEP has all zero indices, it doesn't offset the pointer. If it
        // doesn't, it does.
        ValuesToInspect.push_back(
            std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
        continue;
      }

      if (auto CS = CallSite(I)) {
        // If this is the function being called then we treat it like a load and
        // ignore it.
        if (CS.isCallee(&U))
          continue;

        unsigned DataOpNo = CS.getDataOperandNo(&U);
        bool IsArgOperand = CS.isArgOperand(&U);

        // Inalloca arguments are clobbered by the call.
        if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
          return false;

        // If this is a readonly/readnone call site, then we know it is just a
        // load (but one that potentially returns the value itself), so we can
        // ignore it if we know that the value isn't captured.
        if (CS.onlyReadsMemory() &&
            (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
          continue;

        // If this is being passed as a byval argument, the caller is making a
        // copy, so it is only a read of the alloca.
        if (IsArgOperand && CS.isByValArgument(DataOpNo))
          continue;
      }

      // Lifetime intrinsics can be handled by the caller.
      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
            II->getIntrinsicID() == Intrinsic::lifetime_end) {
          assert(II->use_empty() && "Lifetime markers have no result to use!");
          ToDelete.push_back(II);
          continue;
        }
      }

      // If this is isn't our memcpy/memmove, reject it as something we can't
      // handle.
      MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
      if (!MI)
        return false;

      // If the transfer is using the alloca as a source of the transfer, then
      // ignore it since it is a load (unless the transfer is volatile).
      if (U.getOperandNo() == 1) {
        if (MI->isVolatile()) return false;
        continue;
      }

      // If we already have seen a copy, reject the second one.
      if (TheCopy) return false;

      // If the pointer has been offset from the start of the alloca, we can't
      // safely handle this.
      if (IsOffset) return false;

      // If the memintrinsic isn't using the alloca as the dest, reject it.
      if (U.getOperandNo() != 0) return false;

      // If the source of the memcpy/move is not a constant global, reject it.
      if (!pointsToConstantGlobal(MI->getSource()))
        return false;

      // Otherwise, the transform is safe.  Remember the copy instruction.
      TheCopy = MI;
    }
  }
  return true;
}
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
  SmallVector<LoadInst *, 4> aggrLoads;
  SmallVector<MemTransferInst *, 4> aggrMemcpys;
  SmallVector<MemSetInst *, 4> aggrMemsets;

  DataLayout *TD = &getAnalysis<DataLayout>();
  LLVMContext &Context = F.getParent()->getContext();

  //
  // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
  //
  //const BasicBlock *firstBB = &F.front();  // first BB in F
  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
    //BasicBlock *bb = BI;
    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
        ++II) {
      if (LoadInst * load = dyn_cast<LoadInst>(II)) {

        if (load->hasOneUse() == false) continue;

        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;

        User *use = *(load->use_begin());
        if (StoreInst * store = dyn_cast<StoreInst>(use)) {
          if (store->getOperand(0) != load) //getValueOperand
          continue;
          aggrLoads.push_back(load);
        }
      } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
        Value *len = intr->getLength();
        // If the number of elements being copied is greater
        // than MaxAggrCopySize, lower it to a loop
        if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
          if (len_int->getZExtValue() >= MaxAggrCopySize) {
            aggrMemcpys.push_back(intr);
          }
        } else {
          // turn variable length memcpy/memmov into loop
          aggrMemcpys.push_back(intr);
        }
      } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
        Value *len = memsetintr->getLength();
        if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
          if (len_int->getZExtValue() >= MaxAggrCopySize) {
            aggrMemsets.push_back(memsetintr);
          }
        } else {
          // turn variable length memset into loop
          aggrMemsets.push_back(memsetintr);
        }
      }
    }
  }
  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
      && (aggrMemsets.size() == 0)) return false;

  //
  // Do the transformation of an aggr load/copy/set to a loop
  //
  for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
    LoadInst *load = aggrLoads[i];
    StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
    Value *srcAddr = load->getOperand(0);
    Value *dstAddr = store->getOperand(1);
    unsigned numLoads = TD->getTypeStoreSize(load->getType());
    Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);

    convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
                          store->isVolatile(), Context, F);

    store->eraseFromParent();
    load->eraseFromParent();
  }

  for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
    MemTransferInst *cpy = aggrMemcpys[i];
    Value *len = cpy->getLength();
    // llvm 2.7 version of memcpy does not have volatile
    // operand yet. So always making it non-volatile
    // optimistically, so that we don't see unnecessary
    // st.volatile in ptx
    convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
                          false, Context, F);
    cpy->eraseFromParent();
  }

  for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
    MemSetInst *memsetinst = aggrMemsets[i];
    Value *len = memsetinst->getLength();
    Value *val = memsetinst->getValue();
    convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
                        F);
    memsetinst->eraseFromParent();
  }

  return true;
}
/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
static bool
isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
                               SmallVectorImpl<Instruction *> &ToDelete,
                               bool IsOffset = false) {
  // We track lifetime intrinsics as we encounter them.  If we decide to go
  // ahead and replace the value with the global, this lets the caller quickly
  // eliminate the markers.

  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
    User *U = cast<Instruction>(*UI);

    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
      // Ignore non-volatile loads, they are always ok.
      if (!LI->isSimple()) return false;
      continue;
    }

    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
      // If uses of the bitcast are ok, we are ok.
      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
        return false;
      continue;
    }
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
      // doesn't, it does.
      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
                                          IsOffset || !GEP->hasAllZeroIndices()))
        return false;
      continue;
    }

    if (CallSite CS = U) {
      // If this is the function being called then we treat it like a load and
      // ignore it.
      if (CS.isCallee(UI))
        continue;

      // If this is a readonly/readnone call site, then we know it is just a
      // load (but one that potentially returns the value itself), so we can
      // ignore it if we know that the value isn't captured.
      unsigned ArgNo = CS.getArgumentNo(UI);
      if (CS.onlyReadsMemory() &&
          (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
        continue;

      // If this is being passed as a byval argument, the caller is making a
      // copy, so it is only a read of the alloca.
      if (CS.isByValArgument(ArgNo))
        continue;
    }

    // Lifetime intrinsics can be handled by the caller.
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
          II->getIntrinsicID() == Intrinsic::lifetime_end) {
        assert(II->use_empty() && "Lifetime markers have no result to use!");
        ToDelete.push_back(II);
        continue;
      }
    }

    // If this is isn't our memcpy/memmove, reject it as something we can't
    // handle.
    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
    if (MI == 0)
      return false;

    // If the transfer is using the alloca as a source of the transfer, then
    // ignore it since it is a load (unless the transfer is volatile).
    if (UI.getOperandNo() == 1) {
      if (MI->isVolatile()) return false;
      continue;
    }

    // If we already have seen a copy, reject the second one.
    if (TheCopy) return false;

    // If the pointer has been offset from the start of the alloca, we can't
    // safely handle this.
    if (IsOffset) return false;

    // If the memintrinsic isn't using the alloca as the dest, reject it.
    if (UI.getOperandNo() != 0) return false;

    // If the source of the memcpy/move is not a constant global, reject it.
    if (!pointsToConstantGlobal(MI->getSource()))
      return false;

    // Otherwise, the transform is safe.  Remember the copy instruction.
    TheCopy = MI;
  }
  return true;
}