Esempio n. 1
0
  /// Return a random value with a known type.
  Value *getRandomValue(Type *Tp) {
    unsigned index = Ran->Rand();
    for (unsigned i=0; i<PT->size(); ++i) {
      Value *V = PT->at((index + i) % PT->size());
      if (V->getType() == Tp)
        return V;
    }

    // If the requested type was not found, generate a constant value.
    if (Tp->isIntegerTy()) {
      if (Ran->Rand() & 1)
        return ConstantInt::getAllOnesValue(Tp);
      return ConstantInt::getNullValue(Tp);
    } else if (Tp->isFloatingPointTy()) {
      if (Ran->Rand() & 1)
        return ConstantFP::getAllOnesValue(Tp);
      return ConstantFP::getNullValue(Tp);
    } else if (Tp->isVectorTy()) {
      VectorType *VTp = cast<VectorType>(Tp);

      std::vector<Constant*> TempValues;
      TempValues.reserve(VTp->getNumElements());
      for (unsigned i = 0; i < VTp->getNumElements(); ++i)
        TempValues.push_back(getRandomConstant(VTp->getScalarType()));

      ArrayRef<Constant*> VectorValue(TempValues);
      return ConstantVector::get(VectorValue);
    }

    return UndefValue::get(Tp);
  }
Esempio n. 2
0
bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
  if (!DstVT || !SrcVT)
    return false;

  unsigned DstNumElems = DstVT->getNumElements();
  unsigned SrcNumElems = SrcVT->getNumElements();
  IRBuilder<> Builder(BCI.getParent(), &BCI);
  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
  ValueVector Res;
  Res.resize(DstNumElems);

  if (DstNumElems == SrcNumElems) {
    for (unsigned I = 0; I < DstNumElems; ++I)
      Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
                                     BCI.getName() + ".i" + Twine(I));
  } else if (DstNumElems > SrcNumElems) {
    // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
    // individual elements to the destination.
    unsigned FanOut = DstNumElems / SrcNumElems;
    Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
    unsigned ResI = 0;
    for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
      Value *V = Op0[Op0I];
      Instruction *VI;
      // Look through any existing bitcasts before converting to <N x t2>.
      // In the best case, the resulting conversion might be a no-op.
      while ((VI = dyn_cast<Instruction>(V)) &&
             VI->getOpcode() == Instruction::BitCast)
        V = VI->getOperand(0);
      V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
      Scatterer Mid = scatter(&BCI, V);
      for (unsigned MidI = 0; MidI < FanOut; ++MidI)
        Res[ResI++] = Mid[MidI];
    }
  } else {
    // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
    unsigned FanIn = SrcNumElems / DstNumElems;
    Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
    unsigned Op0I = 0;
    for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
      Value *V = UndefValue::get(MidTy);
      for (unsigned MidI = 0; MidI < FanIn; ++MidI)
        V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
                                        BCI.getName() + ".i" + Twine(ResI)
                                        + ".upto" + Twine(MidI));
      Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
                                        BCI.getName() + ".i" + Twine(ResI));
    }
  }
  gather(&BCI, Res);
  return true;
}
Esempio n. 3
0
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
                   AssumptionCache *AC) {
  // Assume undef could be zero.
  if (isa<UndefValue>(V))
    return true;

  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
  if (!VecTy) {
    KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
    return Known.isZero();
  }

  // Per-component check doesn't work with zeroinitializer
  Constant *C = dyn_cast<Constant>(V);
  if (!C)
    return false;

  if (C->isZeroValue())
    return true;

  // For a vector, KnownZero will only be true if all values are zero, so check
  // this per component
  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
    Constant *Elem = C->getAggregateElement(I);
    if (isa<UndefValue>(Elem))
      return true;

    KnownBits Known = computeKnownBits(Elem, DL);
    if (Known.isZero())
      return true;
  }

  return false;
}
Esempio n. 4
0
bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
  if (!VT)
    return false;

  IRBuilder<> Builder(&GEPI);
  unsigned NumElems = VT->getNumElements();
  unsigned NumIndices = GEPI.getNumIndices();

  Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));

  SmallVector<Scatterer, 8> Ops;
  Ops.resize(NumIndices);
  for (unsigned I = 0; I < NumIndices; ++I)
    Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));

  ValueVector Res;
  Res.resize(NumElems);
  for (unsigned I = 0; I < NumElems; ++I) {
    SmallVector<Value *, 8> Indices;
    Indices.resize(NumIndices);
    for (unsigned J = 0; J < NumIndices; ++J)
      Indices[J] = Ops[J][I];
    Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
                               GEPI.getName() + ".i" + Twine(I));
    if (GEPI.isInBounds())
      if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
        NewGEPI->setIsInBounds();
  }
  gather(&GEPI, Res);
  return true;
}
Esempio n. 5
0
bool Scalarizer::visitSelectInst(SelectInst &SI) {
  VectorType *VT = dyn_cast<VectorType>(SI.getType());
  if (!VT)
    return false;

  unsigned NumElems = VT->getNumElements();
  IRBuilder<> Builder(SI.getParent(), &SI);
  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
  assert(Op1.size() == NumElems && "Mismatched select");
  assert(Op2.size() == NumElems && "Mismatched select");
  ValueVector Res;
  Res.resize(NumElems);

  if (SI.getOperand(0)->getType()->isVectorTy()) {
    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
    assert(Op0.size() == NumElems && "Mismatched select");
    for (unsigned I = 0; I < NumElems; ++I)
      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
                                    SI.getName() + ".i" + Twine(I));
  } else {
    Value *Op0 = SI.getOperand(0);
    for (unsigned I = 0; I < NumElems; ++I)
      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
                                    SI.getName() + ".i" + Twine(I));
  }
  gather(&SI, Res);
  return true;
}
Esempio n. 6
0
/// getEVT - Return the value type corresponding to the specified type.  This
/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
/// are returned as Other, otherwise they are invalid.
EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
  switch (Ty->getTypeID()) {
  default:
    return MVT::getVT(Ty, HandleUnknown);
  case Type::IntegerTyID:
    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
  case Type::VectorTyID: {
    VectorType *VTy = cast<VectorType>(Ty);
    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
                       VTy->getNumElements());
  }
  }
}
Esempio n. 7
0
/// \brief Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
  VectorType *VTy = cast<VectorType>(V->getType());
  unsigned Width = VTy->getNumElements();
  if (EltNo >= Width)  // Out of range access.
    return UndefValue::get(VTy->getElementType());

  if (Constant *C = dyn_cast<Constant>(V))
    return C->getAggregateElement(EltNo);

  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
    // If this is an insert to a variable element, we don't know what it is.
    if (!isa<ConstantInt>(III->getOperand(2)))
      return nullptr;
    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();

    // If this is an insert to the element we are looking for, return the
    // inserted value.
    if (EltNo == IIElt)
      return III->getOperand(1);

    // Otherwise, the insertelement doesn't modify the value, recurse on its
    // vector input.
    return findScalarElement(III->getOperand(0), EltNo);
  }

  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
    int InEl = SVI->getMaskValue(EltNo);
    if (InEl < 0)
      return UndefValue::get(VTy->getElementType());
    if (InEl < (int)LHSWidth)
      return findScalarElement(SVI->getOperand(0), InEl);
    return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
  }

  // Extract a value from a vector add operation with a constant zero.
  Value *Val = nullptr; Constant *Con = nullptr;
  if (match(V,
            llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
                                      llvm::PatternMatch::m_Constant(Con)))) {
    if (Con->getAggregateElement(EltNo)->isNullValue())
      return findScalarElement(Val, EltNo);
  }

  // Otherwise, we don't know.
  return nullptr;
}
Esempio n. 8
0
Value* cg_extension::extract_elements( Value* src, size_t start_pos, size_t length )
{
	VectorType* vty = llvm::cast<VectorType>(src->getType());
	uint32_t elem_count = vty->getNumElements();
	if( start_pos == 0 && length == elem_count ){
		return src;
	}

	vector<int> indexes;
	indexes.resize( length, 0 );
	for ( size_t i_elem = 0; i_elem < length; ++i_elem ){
		indexes[i_elem] = static_cast<int>(i_elem + start_pos);
	}
	Value* mask = get_vector<int>( ArrayRef<int>(indexes) );
	return builder_->CreateShuffleVector( src, UndefValue::get( src->getType() ), mask );
}
Esempio n. 9
0
bool Scalarizer::visitCastInst(CastInst &CI) {
  VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
  if (!VT)
    return false;

  unsigned NumElems = VT->getNumElements();
  IRBuilder<> Builder(CI.getParent(), &CI);
  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
  assert(Op0.size() == NumElems && "Mismatched cast");
  ValueVector Res;
  Res.resize(NumElems);
  for (unsigned I = 0; I < NumElems; ++I)
    Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
                                CI.getName() + ".i" + Twine(I));
  gather(&CI, Res);
  return true;
}
/// FindScalarElement - Given a vector and an element number, see if the scalar
/// value is already around as a register, for example if it were inserted then
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
  VectorType *PTy = cast<VectorType>(V->getType());
  unsigned Width = PTy->getNumElements();
  if (EltNo >= Width)  // Out of range access.
    return UndefValue::get(PTy->getElementType());

  if (isa<UndefValue>(V))
    return UndefValue::get(PTy->getElementType());
  if (isa<ConstantAggregateZero>(V))
    return Constant::getNullValue(PTy->getElementType());
  if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
    return CP->getOperand(EltNo);

  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
    // If this is an insert to a variable element, we don't know what it is.
    if (!isa<ConstantInt>(III->getOperand(2)))
      return 0;
    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();

    // If this is an insert to the element we are looking for, return the
    // inserted value.
    if (EltNo == IIElt)
      return III->getOperand(1);

    // Otherwise, the insertelement doesn't modify the value, recurse on its
    // vector input.
    return FindScalarElement(III->getOperand(0), EltNo);
  }

  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
    unsigned LHSWidth =
      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
    int InEl = SVI->getMaskValue(EltNo);
    if (InEl < 0)
      return UndefValue::get(PTy->getElementType());
    if (InEl < (int)LHSWidth)
      return FindScalarElement(SVI->getOperand(0), InEl);
    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
  }

  // Otherwise, we don't know.
  return 0;
}
Esempio n. 11
0
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
  VectorType *VT = dyn_cast<VectorType>(I.getType());
  if (!VT)
    return false;

  unsigned NumElems = VT->getNumElements();
  IRBuilder<> Builder(I.getParent(), &I);
  Scatterer Op0 = scatter(&I, I.getOperand(0));
  Scatterer Op1 = scatter(&I, I.getOperand(1));
  assert(Op0.size() == NumElems && "Mismatched binary operation");
  assert(Op1.size() == NumElems && "Mismatched binary operation");
  ValueVector Res;
  Res.resize(NumElems);
  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
                      I.getName() + ".i" + Twine(Elem));
  gather(&I, Res);
  return true;
}
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                       Type *SubTp) {
  if (ST->hasVOP3PInsts()) {
    VectorType *VT = cast<VectorType>(Tp);
    if (VT->getNumElements() == 2 &&
        DL.getTypeSizeInBits(VT->getElementType()) == 16) {
      // With op_sel VOP3P instructions freely can access the low half or high
      // half of a register, so any swizzle is free.

      switch (Kind) {
      case TTI::SK_Broadcast:
      case TTI::SK_Reverse:
      case TTI::SK_PermuteSingleSrc:
        return 0;
      default:
        break;
      }
    }
  }

  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
Esempio n. 13
0
bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
  if (!VT)
    return false;

  unsigned NumElems = VT->getNumElements();
  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
  ValueVector Res;
  Res.resize(NumElems);

  for (unsigned I = 0; I < NumElems; ++I) {
    int Selector = SVI.getMaskValue(I);
    if (Selector < 0)
      Res[I] = UndefValue::get(VT->getElementType());
    else if (unsigned(Selector) < Op0.size())
      Res[I] = Op0[Selector];
    else
      Res[I] = Op1[Selector - Op0.size()];
  }
  gather(&SVI, Res);
  return true;
}
Esempio n. 14
0
static bool isZero(Value *V, const DataLayout *DL) {
    // Assume undef could be zero.
    if (isa<UndefValue>(V))
        return true;

    VectorType *VecTy = dyn_cast<VectorType>(V->getType());
    if (!VecTy) {
        unsigned BitWidth = V->getType()->getIntegerBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
        computeKnownBits(V, KnownZero, KnownOne, DL);
        return KnownZero.isAllOnesValue();
    }

    // Per-component check doesn't work with zeroinitializer
    Constant *C = dyn_cast<Constant>(V);
    if (!C)
        return false;

    if (C->isZeroValue())
        return true;

    // For a vector, KnownZero will only be true if all values are zero, so check
    // this per component
    unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
    for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
        Constant *Elem = C->getAggregateElement(I);
        if (isa<UndefValue>(Elem))
            return true;

        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
        computeKnownBits(Elem, KnownZero, KnownOne, DL);
        if (KnownZero.isAllOnesValue())
            return true;
    }

    return false;
}
Esempio n. 15
0
/// Return the value type corresponding to the specified type.  This returns all
/// pointers as MVT::iPTR.  If HandleUnknown is true, unknown types are returned
/// as Other, otherwise they are invalid.
MVT MVT::getVT(Type *Ty, bool HandleUnknown){
  switch (Ty->getTypeID()) {
  default:
    if (HandleUnknown) return MVT(MVT::Other);
    llvm_unreachable("Unknown type!");
  case Type::VoidTyID:
    return MVT::isVoid;
  case Type::IntegerTyID:
    return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
  case Type::HalfTyID:      return MVT(MVT::f16);
  case Type::FloatTyID:     return MVT(MVT::f32);
  case Type::DoubleTyID:    return MVT(MVT::f64);
  case Type::X86_FP80TyID:  return MVT(MVT::f80);
  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
  case Type::FP128TyID:     return MVT(MVT::f128);
  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
  case Type::PointerTyID:   return MVT(MVT::iPTR);
  case Type::VectorTyID: {
    VectorType *VTy = cast<VectorType>(Ty);
    return getVectorVT(
      getVT(VTy->getElementType(), false), VTy->getNumElements());
  }
  }
}
Esempio n. 16
0
bool Scalarizer::visitPHINode(PHINode &PHI) {
  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
  if (!VT)
    return false;

  unsigned NumElems = VT->getNumElements();
  IRBuilder<> Builder(PHI.getParent(), &PHI);
  ValueVector Res;
  Res.resize(NumElems);

  unsigned NumOps = PHI.getNumOperands();
  for (unsigned I = 0; I < NumElems; ++I)
    Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
                               PHI.getName() + ".i" + Twine(I));

  for (unsigned I = 0; I < NumOps; ++I) {
    Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
    BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
    for (unsigned J = 0; J < NumElems; ++J)
      cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
  }
  gather(&PHI, Res);
  return true;
}
Esempio n. 17
0
  virtual void Act() {

    Value *V = getRandomVal();
    Type *VTy = V->getType();
    Type *DestTy = pickScalarType();

    // Handle vector casts vectors.
    if (VTy->isVectorTy()) {
      VectorType *VecTy = cast<VectorType>(VTy);
      DestTy = pickVectorType(VecTy->getNumElements());
    }

    // no need to casr.
    if (VTy == DestTy) return;

    // Pointers:
    if (VTy->isPointerTy()) {
      if (!DestTy->isPointerTy())
        DestTy = PointerType::get(DestTy, 0);
      return PT->push_back(
        new BitCastInst(V, DestTy, "PC", BB->getTerminator()));
    }

    // Generate lots of bitcasts.
    if ((Ran->Rand() & 1) &&
        VTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
      return PT->push_back(
        new BitCastInst(V, DestTy, "BC", BB->getTerminator()));
    }

    // Both types are integers:
    if (VTy->getScalarType()->isIntegerTy() &&
        DestTy->getScalarType()->isIntegerTy()) {
      if (VTy->getScalarType()->getPrimitiveSizeInBits() >
          DestTy->getScalarType()->getPrimitiveSizeInBits()) {
        return PT->push_back(
          new TruncInst(V, DestTy, "Tr", BB->getTerminator()));
      } else {
        if (Ran->Rand() & 1)
          return PT->push_back(
            new ZExtInst(V, DestTy, "ZE", BB->getTerminator()));
        return PT->push_back(new SExtInst(V, DestTy, "Se", BB->getTerminator()));
      }
    }

    // Fp to int.
    if (VTy->getScalarType()->isFloatingPointTy() &&
        DestTy->getScalarType()->isIntegerTy()) {
      if (Ran->Rand() & 1)
        return PT->push_back(
          new FPToSIInst(V, DestTy, "FC", BB->getTerminator()));
      return PT->push_back(new FPToUIInst(V, DestTy, "FC", BB->getTerminator()));
    }

    // Int to fp.
    if (VTy->getScalarType()->isIntegerTy() &&
        DestTy->getScalarType()->isFloatingPointTy()) {
      if (Ran->Rand() & 1)
        return PT->push_back(
          new SIToFPInst(V, DestTy, "FC", BB->getTerminator()));
      return PT->push_back(new UIToFPInst(V, DestTy, "FC", BB->getTerminator()));

    }

    // Both floats.
    if (VTy->getScalarType()->isFloatingPointTy() &&
        DestTy->getScalarType()->isFloatingPointTy()) {
      if (VTy->getScalarType()->getPrimitiveSizeInBits() >
          DestTy->getScalarType()->getPrimitiveSizeInBits()) {
        return PT->push_back(
          new FPTruncInst(V, DestTy, "Tr", BB->getTerminator()));
      } else {
        return PT->push_back(
          new FPExtInst(V, DestTy, "ZE", BB->getTerminator()));
      }
    }
  }
/// WriteTypeTable - Write out the type table for a module.
static void WriteTypeTable(const NaClValueEnumerator &VE,
                           NaClBitstreamWriter &Stream) {
  DEBUG(dbgs() << "-> WriteTypeTable\n");
  const NaClValueEnumerator::TypeList &TypeList = VE.getTypes();

  Stream.EnterSubblock(naclbitc::TYPE_BLOCK_ID_NEW, TYPE_MAX_ABBREV);

  SmallVector<uint64_t, 64> TypeVals;

  // Abbrev for TYPE_CODE_FUNCTION.
  NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION));
  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));  // isvararg
  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
  Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
  if (TYPE_FUNCTION_ABBREV != Stream.EmitAbbrev(Abbv))
    llvm_unreachable("Unexpected abbrev ordering!");

  // Emit an entry count so the reader can reserve space.
  TypeVals.push_back(TypeList.size());
  Stream.EmitRecord(naclbitc::TYPE_CODE_NUMENTRY, TypeVals);
  TypeVals.clear();

  // Loop over all of the types, emitting each in turn.
  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
    Type *T = TypeList[i];
    int AbbrevToUse = 0;
    unsigned Code = 0;

    switch (T->getTypeID()) {
    default: llvm_unreachable("Unknown type!");
    case Type::VoidTyID:      Code = naclbitc::TYPE_CODE_VOID;      break;
    case Type::FloatTyID:     Code = naclbitc::TYPE_CODE_FLOAT;     break;
    case Type::DoubleTyID:    Code = naclbitc::TYPE_CODE_DOUBLE;    break;
    case Type::IntegerTyID:
      // INTEGER: [width]
      Code = naclbitc::TYPE_CODE_INTEGER;
      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
      break;
    case Type::VectorTyID: {
      VectorType *VT = cast<VectorType>(T);
      // VECTOR [numelts, eltty]
      Code = naclbitc::TYPE_CODE_VECTOR;
      TypeVals.push_back(VT->getNumElements());
      TypeVals.push_back(VE.getTypeID(VT->getElementType()));
      break;
    }
    case Type::FunctionTyID: {
      FunctionType *FT = cast<FunctionType>(T);
      // FUNCTION: [isvararg, retty, paramty x N]
      Code = naclbitc::TYPE_CODE_FUNCTION;
      TypeVals.push_back(FT->isVarArg());
      TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
        TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
      AbbrevToUse = TYPE_FUNCTION_ABBREV;
      break;
    }
    case Type::StructTyID:
      report_fatal_error("Struct types are not supported in PNaCl bitcode");
    case Type::ArrayTyID:
      report_fatal_error("Array types are not supported in PNaCl bitcode");
    }

    // Emit the finished record.
    Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
    TypeVals.clear();
  }

  Stream.ExitBlock();
  DEBUG(dbgs() << "<- WriteTypeTable\n");
}
Esempio n. 19
0
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
  CallingConv::ID CC = F.getCallingConv();
  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
    return false;

  auto &TPC = getAnalysis<TargetPassConfig>();

  const TargetMachine &TM = TPC.getTM<TargetMachine>();
  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
  LLVMContext &Ctx = F.getParent()->getContext();
  const DataLayout &DL = F.getParent()->getDataLayout();
  BasicBlock &EntryBlock = *F.begin();
  IRBuilder<> Builder(&*EntryBlock.begin());

  const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary
  const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);

  unsigned MaxAlign;
  // FIXME: Alignment is broken broken with explicit arg offset.;
  const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
  if (TotalKernArgSize == 0)
    return false;

  CallInst *KernArgSegment =
      Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
                              nullptr, F.getName() + ".kernarg.segment");

  KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
  KernArgSegment->addAttribute(AttributeList::ReturnIndex,
    Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));

  unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
  uint64_t ExplicitArgOffset = 0;

  for (Argument &Arg : F.args()) {
    Type *ArgTy = Arg.getType();
    unsigned Align = DL.getABITypeAlignment(ArgTy);
    unsigned Size = DL.getTypeSizeInBits(ArgTy);
    unsigned AllocSize = DL.getTypeAllocSize(ArgTy);

    uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
    ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;

    if (Arg.use_empty())
      continue;

    if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
      // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing
      // modes on SI to know the high bits are 0 so pointer adds don't wrap. We
      // can't represent this with range metadata because it's only allowed for
      // integer types.
      if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
           PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
          ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
        continue;

      // FIXME: We can replace this with equivalent alias.scope/noalias
      // metadata, but this appears to be a lot of work.
      if (Arg.hasNoAliasAttr())
        continue;
    }

    VectorType *VT = dyn_cast<VectorType>(ArgTy);
    bool IsV3 = VT && VT->getNumElements() == 3;
    bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();

    VectorType *V4Ty = nullptr;

    int64_t AlignDownOffset = alignDown(EltOffset, 4);
    int64_t OffsetDiff = EltOffset - AlignDownOffset;
    unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
                                      KernArgBaseAlign);

    Value *ArgPtr;
    Type *AdjustedArgTy;
    if (DoShiftOpt) { // FIXME: Handle aggregate types
      // Since we don't have sub-dword scalar loads, avoid doing an extload by
      // loading earlier than the argument address, and extracting the relevant
      // bits.
      //
      // Additionally widen any sub-dword load to i32 even if suitably aligned,
      // so that CSE between different argument loads works easily.
      ArgPtr = Builder.CreateConstInBoundsGEP1_64(
          Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
          Arg.getName() + ".kernarg.offset.align.down");
      AdjustedArgTy = Builder.getInt32Ty();
    } else {
      ArgPtr = Builder.CreateConstInBoundsGEP1_64(
          Builder.getInt8Ty(), KernArgSegment, EltOffset,
          Arg.getName() + ".kernarg.offset");
      AdjustedArgTy = ArgTy;
    }

    if (IsV3 && Size >= 32) {
      V4Ty = VectorType::get(VT->getVectorElementType(), 4);
      // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
      AdjustedArgTy = V4Ty;
    }

    ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
                                   ArgPtr->getName() + ".cast");
    LoadInst *Load =
        Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
    Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));

    MDBuilder MDB(Ctx);

    if (isa<PointerType>(ArgTy)) {
      if (Arg.hasNonNullAttr())
        Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));

      uint64_t DerefBytes = Arg.getDereferenceableBytes();
      if (DerefBytes != 0) {
        Load->setMetadata(
          LLVMContext::MD_dereferenceable,
          MDNode::get(Ctx,
                      MDB.createConstant(
                        ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));
      }

      uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();
      if (DerefOrNullBytes != 0) {
        Load->setMetadata(
          LLVMContext::MD_dereferenceable_or_null,
          MDNode::get(Ctx,
                      MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
                                                          DerefOrNullBytes))));
      }

      unsigned ParamAlign = Arg.getParamAlignment();
      if (ParamAlign != 0) {
        Load->setMetadata(
          LLVMContext::MD_align,
          MDNode::get(Ctx,
                      MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
                                                          ParamAlign))));
      }
    }

    // TODO: Convert noalias arg to !noalias

    if (DoShiftOpt) {
      Value *ExtractBits = OffsetDiff == 0 ?
        Load : Builder.CreateLShr(Load, OffsetDiff * 8);

      IntegerType *ArgIntTy = Builder.getIntNTy(Size);
      Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);
      Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,
                                            Arg.getName() + ".load");
      Arg.replaceAllUsesWith(NewVal);
    } else if (IsV3) {
      Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
                                                {0, 1, 2},
                                                Arg.getName() + ".load");
      Arg.replaceAllUsesWith(Shuf);
    } else {
      Load->setName(Arg.getName() + ".load");
      Arg.replaceAllUsesWith(Load);
    }
  }

  KernArgSegment->addAttribute(
    AttributeList::ReturnIndex,
    Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));

  return true;
}
Esempio n. 20
0
void StoreInitializer::append(const Constant *CV, StringRef Var) {
  switch (CV->getValueID()) {
  case Value::ConstantArrayVal: { // Recursive type.
    const ConstantArray *CA = cast<ConstantArray>(CV);
    for (unsigned I = 0, E = CA->getNumOperands(); I < E; ++I)
      append(cast<Constant>(CA->getOperand(I)), Var);

    break;
  }
  case Value::ConstantDataArrayVal: {
    const ConstantDataArray *CVE = cast<ConstantDataArray>(CV);
    for (unsigned I = 0, E = CVE->getNumElements(); I < E; ++I)
      append(cast<Constant>(CVE->getElementAsConstant(I)), Var);

    break;
  }
  case Value::ConstantStructVal: { // Recursive type.
    const ConstantStruct *S = cast<ConstantStruct>(CV);
    StructType *ST = S->getType();
    const StructLayout *SL = DL.getStructLayout(ST);

    uint64_t StructSize = DL.getTypeAllocSize(ST);
    uint64_t BaseOffset = SL->getElementOffset(0);

    for (unsigned I = 0, E = S->getNumOperands(); I < E; ++I) {
      Constant *Elt = cast<Constant>(S->getOperand(I));
      append(Elt, Var);

      uint64_t EltSize = DL.getTypeAllocSize(Elt->getType());
      uint64_t EltOffset = SL->getElementOffset(I);

      uint64_t PaddedEltSize;
      if (I == E - 1)
        PaddedEltSize = BaseOffset + StructSize - EltOffset;
      else
        PaddedEltSize = SL->getElementOffset(I + 1) - EltOffset;

      // Match structure layout by padding with zeroes.
      while (EltSize < PaddedEltSize) {
        LE.write(static_cast<uint8_t>(0));
        ++EltSize;
      }
    }
    break;
  }
  case Value::ConstantVectorVal: { // Almost leaf type.
    const ConstantVector *CVE = cast<ConstantVector>(CV);
    VectorType *Ty = CVE->getType();
    Type *EltTy = Ty->getElementType();
    unsigned NElts = Ty->getNumElements();
    unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy);

    unsigned I;
    for (I = 0; I < NElts; ++I)
      append(cast<Constant>(CVE->getOperand(I)), Var);

    Constant *Zero = Constant::getNullValue(EltTy);
    while (I < RealNElts) {
      append(Zero, Var);
      ++I;
    }

    break;
  }
  case Value::ConstantDataVectorVal: {
    const ConstantDataVector *CVE = cast<ConstantDataVector>(CV);
    VectorType *Ty = CVE->getType();
    Type *EltTy = Ty->getElementType();
    unsigned NElts = Ty->getNumElements();
    unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy);

    unsigned I;
    for (I = 0; I < NElts; ++I)
      append(cast<Constant>(CVE->getElementAsConstant(I)), Var);

    Constant *Zero = Constant::getNullValue(EltTy);
    while (I < RealNElts) {
      append(Zero, Var);
      ++I;
    }

    break;
  }
  case Value::ConstantIntVal: {
    const ConstantInt *CI = cast<ConstantInt>(CV);
    if (CI->getType()->isIntegerTy(1)) {
      LE.write(static_cast<uint8_t>(CI->getZExtValue() ? 1 : 0));
    } else {
      switch (CI->getBitWidth()) {
      case 8:
        LE.write(static_cast<uint8_t>(CI->getZExtValue()));
        break;
      case 16:
        LE.write(static_cast<uint16_t>(CI->getZExtValue()));
        break;
      case 32:
        LE.write(static_cast<uint32_t>(CI->getZExtValue()));
        break;
      case 64:
        LE.write(static_cast<uint64_t>(CI->getZExtValue()));
        break;
      }
    }
    break;
  }
  case Value::ConstantFPVal: {
    const ConstantFP *CFP = cast<ConstantFP>(CV);
    if (CFP->getType()->isFloatTy())
      LE.write(CFP->getValueAPF().convertToFloat());
    else if (CFP->getType()->isDoubleTy())
      LE.write(CFP->getValueAPF().convertToDouble());
    else
      llvm_unreachable("unhandled ConstantFP type");
    break;
  }
  case Value::ConstantPointerNullVal: {
    unsigned AS = CV->getType()->getPointerAddressSpace();
    if (DL.getPointerSize(AS) == 8)
      LE.write(static_cast<uint64_t>(0));
    else
      LE.write(static_cast<uint32_t>(0));
    break;
  }
  case Value::UndefValueVal:
  case Value::ConstantAggregateZeroVal: {
    uint64_t Size = DL.getTypeAllocSize(CV->getType());
    for (uint64_t I = 0; I < Size / InitEltSize; ++I) {
      switch (InitEltSize) {
      case 1:
        LE.write(static_cast<uint8_t>(0));
        break;
      case 2:
        LE.write(static_cast<uint16_t>(0));
        break;
      case 4:
        LE.write(static_cast<uint32_t>(0));
        break;
      case 8:
        LE.write(static_cast<uint64_t>(0));
        break;
      default:
        llvm_unreachable("unhandled size");
      }
    }

    break;
  }
  case Value::GlobalVariableVal:
  case Value::ConstantExprVal: {
    const MCExpr *Expr = AP.lowerConstant(CV);

    // Offset that address needs to be written at is the current size of the
    // buffer.
    uint64_t CurrOffset = dataSizeInBytes();

    unsigned Size = DL.getTypeAllocSize(CV->getType());
    switch (Size) {
    case 4:
      LE.write(static_cast<uint32_t>(0));
      break;
    case 8:
      LE.write(static_cast<uint64_t>(0));
      break;
    default:
      llvm_unreachable("unhandled size");
    }

    VarInitAddresses.emplace_back(CurrOffset, Expr);
    break;
  }
  default:
    llvm_unreachable("unhandled initializer");
  }
}
Esempio n. 21
0
int BoUpSLP::getTreeRollCost(ValueList &VL, unsigned Depth) {
  if (Depth == 6) return max_cost;
  Type *ScalarTy = VL[0]->getType();

  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
    ScalarTy = SI->getValueOperand()->getType();

  /// Don't mess with vectors.
  if (ScalarTy->isVectorTy()) return max_cost;

  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());

  // Check if all of the operands are constants.
  bool AllConst = true;
  bool AllSameScalar = true;
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
    AllConst &= isa<Constant>(VL[i]);
    AllSameScalar &= (VL[0] == VL[i]);
    // Must have a single use.
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    // Need to scalarize instructions with multiple users or from other BBs.
    if (I && ((I->getNumUses() > 1) || (I->getParent() != BB)))
      return getScalarizationCost(VecTy);
  }

  // Is this a simple vector constant.
  if (AllConst) return 0;

  // If all of the operands are identical we can broadcast them.
  if (AllSameScalar)
    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);

  // Scalarize unknown structures.
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
  if (!VL0) return getScalarizationCost(VecTy);
  assert(VL0->getParent() == BB && "Wrong BB");

  unsigned Opcode = VL0->getOpcode();
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
    Instruction *I = dyn_cast<Instruction>(VL[i]);
    // If not all of the instructions are identical then we have to scalarize.
    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
  }

  // Check if it is safe to sink the loads or the stores.
  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
    int MaxIdx = InstrIdx[VL0];
    for (unsigned i = 1, e = VL.size(); i < e; ++i )
      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);

    Instruction *Last = InstrVec[MaxIdx];
    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
      if (VL[i] == Last) continue;
      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
      if (Barrier) {
        DEBUG(dbgs() << "LR: Can't sink " << *VL[i] << "\n down to " <<
              *Last << "\n because of " << *Barrier << "\n");
        return max_cost;
      }
    }
  }

  switch (Opcode) {
  case Instruction::Add:
  case Instruction::FAdd:
  case Instruction::Sub:
  case Instruction::FSub:
  case Instruction::Mul:
  case Instruction::FMul:
  case Instruction::UDiv:
  case Instruction::SDiv:
  case Instruction::FDiv:
  case Instruction::URem:
  case Instruction::SRem:
  case Instruction::FRem:
  case Instruction::Shl:
  case Instruction::LShr:
  case Instruction::AShr:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor: {
    ValueList Operands;
    int Cost = 0;
    // Calculate the cost of all of the operands.
    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
      // Prepare the operand vector.
      for (unsigned j = 0; j < VL.size(); ++j)
        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
      Cost += getTreeRollCost(Operands, Depth+1);
      Operands.clear();
    }

    // Calculate the cost of this instruction.
    int ScalarCost = VecTy->getNumElements() *
      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
    Cost += (VecCost - ScalarCost);
    return Cost;
  }
  case Instruction::Load: {
    // If we are scalarize the loads, add the cost of forming the vector.
    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
      if (!isConsecutiveAccess(VL[i], VL[i+1]))
        return getScalarizationCost(VecTy);

    // Cost of wide load - cost of scalar loads.
    int ScalarLdCost = VecTy->getNumElements() *
      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
    return VecLdCost - ScalarLdCost;
  }
  case Instruction::Store: {
    // We know that we can merge the stores. Calculate the cost.
    int ScalarStCost = VecTy->getNumElements() *
      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
    int StoreCost = VecStCost - ScalarStCost;

    ValueList Operands;
    for (unsigned j = 0; j < VL.size(); ++j) {
      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
      MemBarrierIgnoreList.insert(VL[j]);
    }

    int TotalCost =  StoreCost + getTreeRollCost(Operands, Depth + 1);
    MemBarrierIgnoreList.clear();
    return TotalCost;
  }
  default:
    // Unable to vectorize unknown instructions.
    return getScalarizationCost(VecTy);
  }
}
Esempio n. 22
0
// Translate a masked store intrinsic, like
// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
//                               <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
// the appropriate mask bit is set
//
//   %1 = bitcast i8* %addr to i32*
//   %2 = extractelement <16 x i1> %mask, i32 0
//   br i1 %2, label %cond.store, label %else
//
// cond.store:                                       ; preds = %0
//   %3 = extractelement <16 x i32> %val, i32 0
//   %4 = getelementptr i32* %1, i32 0
//   store i32 %3, i32* %4
//   br label %else
//
// else:                                             ; preds = %0, %cond.store
//   %5 = extractelement <16 x i1> %mask, i32 1
//   br i1 %5, label %cond.store1, label %else2
//
// cond.store1:                                      ; preds = %else
//   %6 = extractelement <16 x i32> %val, i32 1
//   %7 = getelementptr i32* %1, i32 1
//   store i32 %6, i32* %7
//   br label %else2
//   . . .
static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
  Value *Src = CI->getArgOperand(0);
  Value *Ptr = CI->getArgOperand(1);
  Value *Alignment = CI->getArgOperand(2);
  Value *Mask = CI->getArgOperand(3);

  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
  VectorType *VecType = cast<VectorType>(Src->getType());

  Type *EltTy = VecType->getElementType();

  IRBuilder<> Builder(CI->getContext());
  Instruction *InsertPt = CI;
  BasicBlock *IfBlock = CI->getParent();
  Builder.SetInsertPoint(InsertPt);
  Builder.SetCurrentDebugLocation(CI->getDebugLoc());

  // Short-cut if the mask is all-true.
  if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
    Builder.CreateAlignedStore(Src, Ptr, AlignVal);
    CI->eraseFromParent();
    return;
  }

  // Adjust alignment for the scalar instruction.
  AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  // Bitcast %addr from i8* to EltTy*
  Type *NewPtrType =
      EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  unsigned VectorWidth = VecType->getNumElements();

  if (isConstantIntVector(Mask)) {
    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
        continue;
      Value *OneElt = Builder.CreateExtractElement(Src, Idx);
      Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
      Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
    }
    CI->eraseFromParent();
    return;
  }

  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
    // Fill the "else" block, created in the previous iteration
    //
    //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
    //  br i1 %mask_1, label %cond.store, label %else
    //
    Value *Predicate = Builder.CreateExtractElement(Mask, Idx);

    // Create "cond" block
    //
    //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
    //  %EltAddr = getelementptr i32* %1, i32 0
    //  %store i32 %OneElt, i32* %EltAddr
    //
    BasicBlock *CondBlock =
        IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
    Builder.SetInsertPoint(InsertPt);

    Value *OneElt = Builder.CreateExtractElement(Src, Idx);
    Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
    Builder.CreateAlignedStore(OneElt, Gep, AlignVal);

    // Create "else" block, fill it in the next iteration
    BasicBlock *NewIfBlock =
        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
    Builder.SetInsertPoint(InsertPt);
    Instruction *OldBr = IfBlock->getTerminator();
    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
    OldBr->eraseFromParent();
    IfBlock = NewIfBlock;
  }
  CI->eraseFromParent();

  ModifiedDT = true;
}
Esempio n. 23
0
// Translate a masked gather intrinsic like
// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
//                               <16 x i1> %Mask, <16 x i32> %Src)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
// %Mask0 = extractelement <16 x i1> %Mask, i32 0
// br i1 %Mask0, label %cond.load, label %else
//
// cond.load:
// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// %Load0 = load i32, i32* %Ptr0, align 4
// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
// br label %else
//
// else:
// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
// br i1 %Mask1, label %cond.load1, label %else2
//
// cond.load1:
// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
// %Load1 = load i32, i32* %Ptr1, align 4
// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
// br label %else2
// . . .
// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
  Value *Ptrs = CI->getArgOperand(0);
  Value *Alignment = CI->getArgOperand(1);
  Value *Mask = CI->getArgOperand(2);
  Value *Src0 = CI->getArgOperand(3);

  VectorType *VecType = cast<VectorType>(CI->getType());
  Type *EltTy = VecType->getElementType();

  IRBuilder<> Builder(CI->getContext());
  Instruction *InsertPt = CI;
  BasicBlock *IfBlock = CI->getParent();
  Builder.SetInsertPoint(InsertPt);
  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();

  Builder.SetCurrentDebugLocation(CI->getDebugLoc());

  // The result vector
  Value *VResult = Src0;
  unsigned VectorWidth = VecType->getNumElements();

  // Shorten the way if the mask is a vector of constants.
  if (isConstantIntVector(Mask)) {
    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
        continue;
      Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
      LoadInst *Load =
          Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
      VResult =
          Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
    }
    CI->replaceAllUsesWith(VResult);
    CI->eraseFromParent();
    return;
  }

  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
    // Fill the "else" block, created in the previous iteration
    //
    //  %Mask1 = extractelement <16 x i1> %Mask, i32 1
    //  br i1 %Mask1, label %cond.load, label %else
    //

    Value *Predicate =
        Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));

    // Create "cond" block
    //
    //  %EltAddr = getelementptr i32* %1, i32 0
    //  %Elt = load i32* %EltAddr
    //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
    //
    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
    Builder.SetInsertPoint(InsertPt);

    Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
    LoadInst *Load =
        Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
    Value *NewVResult =
        Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));

    // Create "else" block, fill it in the next iteration
    BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
    Builder.SetInsertPoint(InsertPt);
    Instruction *OldBr = IfBlock->getTerminator();
    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
    OldBr->eraseFromParent();
    BasicBlock *PrevIfBlock = IfBlock;
    IfBlock = NewIfBlock;

    PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
    Phi->addIncoming(NewVResult, CondBlock);
    Phi->addIncoming(VResult, PrevIfBlock);
    VResult = Phi;
  }

  CI->replaceAllUsesWith(VResult);
  CI->eraseFromParent();

  ModifiedDT = true;
}
Esempio n. 24
0
// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
//                               <16 x i1> %mask, <16 x i32> %passthru)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
//  %1 = bitcast i8* %addr to i32*
//  %2 = extractelement <16 x i1> %mask, i32 0
//  br i1 %2, label %cond.load, label %else
//
// cond.load:                                        ; preds = %0
//  %3 = getelementptr i32* %1, i32 0
//  %4 = load i32* %3
//  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
//  br label %else
//
// else:                                             ; preds = %0, %cond.load
//  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
//  %6 = extractelement <16 x i1> %mask, i32 1
//  br i1 %6, label %cond.load1, label %else2
//
// cond.load1:                                       ; preds = %else
//  %7 = getelementptr i32* %1, i32 1
//  %8 = load i32* %7
//  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
//  br label %else2
//
// else2:                                          ; preds = %else, %cond.load1
//  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
//  %10 = extractelement <16 x i1> %mask, i32 2
//  br i1 %10, label %cond.load4, label %else5
//
static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
  Value *Ptr = CI->getArgOperand(0);
  Value *Alignment = CI->getArgOperand(1);
  Value *Mask = CI->getArgOperand(2);
  Value *Src0 = CI->getArgOperand(3);

  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
  VectorType *VecType = cast<VectorType>(CI->getType());

  Type *EltTy = VecType->getElementType();

  IRBuilder<> Builder(CI->getContext());
  Instruction *InsertPt = CI;
  BasicBlock *IfBlock = CI->getParent();

  Builder.SetInsertPoint(InsertPt);
  Builder.SetCurrentDebugLocation(CI->getDebugLoc());

  // Short-cut if the mask is all-true.
  if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
    Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
    CI->replaceAllUsesWith(NewI);
    CI->eraseFromParent();
    return;
  }

  // Adjust alignment for the scalar instruction.
  AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
  // Bitcast %addr from i8* to EltTy*
  Type *NewPtrType =
      EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
  Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
  unsigned VectorWidth = VecType->getNumElements();

  // The result vector
  Value *VResult = Src0;

  if (isConstantIntVector(Mask)) {
    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
        continue;
      Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
      LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
      VResult = Builder.CreateInsertElement(VResult, Load, Idx);
    }
    CI->replaceAllUsesWith(VResult);
    CI->eraseFromParent();
    return;
  }

  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
    // Fill the "else" block, created in the previous iteration
    //
    //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
    //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
    //  br i1 %mask_1, label %cond.load, label %else
    //

    Value *Predicate = Builder.CreateExtractElement(Mask, Idx);

    // Create "cond" block
    //
    //  %EltAddr = getelementptr i32* %1, i32 0
    //  %Elt = load i32* %EltAddr
    //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
    //
    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
                                                     "cond.load");
    Builder.SetInsertPoint(InsertPt);

    Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
    LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
    Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);

    // Create "else" block, fill it in the next iteration
    BasicBlock *NewIfBlock =
        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
    Builder.SetInsertPoint(InsertPt);
    Instruction *OldBr = IfBlock->getTerminator();
    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
    OldBr->eraseFromParent();
    BasicBlock *PrevIfBlock = IfBlock;
    IfBlock = NewIfBlock;

    // Create the phi to join the new and previous value.
    PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
    Phi->addIncoming(NewVResult, CondBlock);
    Phi->addIncoming(VResult, PrevIfBlock);
    VResult = Phi;
  }

  CI->replaceAllUsesWith(VResult);
  CI->eraseFromParent();

  ModifiedDT = true;
}
Esempio n. 25
0
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  Value *LHS = SVI.getOperand(0);
  Value *RHS = SVI.getOperand(1);
  SmallVector<int, 16> Mask = SVI.getShuffleMask();
  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());

  bool MadeChange = false;

  // Undefined shuffle mask -> undefined value.
  if (isa<UndefValue>(SVI.getOperand(2)))
    return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));

  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();

  APInt UndefElts(VWidth, 0);
  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
    if (V != &SVI)
      return replaceInstUsesWith(SVI, V);
    LHS = SVI.getOperand(0);
    RHS = SVI.getOperand(1);
    MadeChange = true;
  }

  unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();

  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
  if (LHS == RHS || isa<UndefValue>(LHS)) {
    if (isa<UndefValue>(LHS) && LHS == RHS) {
      // shuffle(undef,undef,mask) -> undef.
      Value *Result = (VWidth == LHSWidth)
                      ? LHS : UndefValue::get(SVI.getType());
      return replaceInstUsesWith(SVI, Result);
    }

    // Remap any references to RHS to use LHS.
    SmallVector<Constant*, 16> Elts;
    for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
      if (Mask[i] < 0) {
        Elts.push_back(UndefValue::get(Int32Ty));
        continue;
      }

      if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
          (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
        Mask[i] = -1;     // Turn into undef.
        Elts.push_back(UndefValue::get(Int32Ty));
      } else {
        Mask[i] = Mask[i] % e;  // Force to LHS.
        Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
      }
    }
    SVI.setOperand(0, SVI.getOperand(1));
    SVI.setOperand(1, UndefValue::get(RHS->getType()));
    SVI.setOperand(2, ConstantVector::get(Elts));
    LHS = SVI.getOperand(0);
    RHS = SVI.getOperand(1);
    MadeChange = true;
  }

  if (VWidth == LHSWidth) {
    // Analyze the shuffle, are the LHS or RHS and identity shuffles?
    bool isLHSID, isRHSID;
    recognizeIdentityMask(Mask, isLHSID, isRHSID);

    // Eliminate identity shuffles.
    if (isLHSID) return replaceInstUsesWith(SVI, LHS);
    if (isRHSID) return replaceInstUsesWith(SVI, RHS);
  }

  if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
    Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
    return replaceInstUsesWith(SVI, V);
  }

  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
  // a non-vector type. We can instead bitcast the original vector followed by
  // an extract of the desired element:
  //
  //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
  //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  //   %1 = bitcast <4 x i8> %sroa to i32
  // Becomes:
  //   %bc = bitcast <16 x i8> %in to <4 x i32>
  //   %ext = extractelement <4 x i32> %bc, i32 0
  //
  // If the shuffle is extracting a contiguous range of values from the input
  // vector then each use which is a bitcast of the extracted size can be
  // replaced. This will work if the vector types are compatible, and the begin
  // index is aligned to a value in the casted vector type. If the begin index
  // isn't aligned then we can shuffle the original vector (keeping the same
  // vector type) before extracting.
  //
  // This code will bail out if the target type is fundamentally incompatible
  // with vectors of the source type.
  //
  // Example of <16 x i8>, target type i32:
  // Index range [4,8):         v-----------v Will work.
  //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
  //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
  //     <4 x i32>: |           |           |           |           |
  //                +-----------+-----------+-----------+-----------+
  // Index range [6,10):              ^-----------^ Needs an extra shuffle.
  // Target type i40:           ^--------------^ Won't work, bail.
  if (isShuffleExtractingFromLHS(SVI, Mask)) {
    Value *V = LHS;
    unsigned MaskElems = Mask.size();
    unsigned BegIdx = Mask.front();
    VectorType *SrcTy = cast<VectorType>(V->getType());
    unsigned VecBitWidth = SrcTy->getBitWidth();
    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
    assert(SrcElemBitWidth && "vector elements must have a bitwidth");
    unsigned SrcNumElems = SrcTy->getNumElements();
    SmallVector<BitCastInst *, 8> BCs;
    DenseMap<Type *, Value *> NewBCs;
    for (User *U : SVI.users())
      if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
        if (!BC->use_empty())
          // Only visit bitcasts that weren't previously handled.
          BCs.push_back(BC);
    for (BitCastInst *BC : BCs) {
      Type *TgtTy = BC->getDestTy();
      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
      if (!TgtElemBitWidth)
        continue;
      unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
      bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
      bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
      if (!VecBitWidthsEqual)
        continue;
      if (!VectorType::isValidElementType(TgtTy))
        continue;
      VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
      if (!BegIsAligned) {
        // Shuffle the input so [0,NumElements) contains the output, and
        // [NumElems,SrcNumElems) is undef.
        SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
                                                UndefValue::get(Int32Ty));
        for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
          ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
        V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
                                         ConstantVector::get(ShuffleMask),
                                         SVI.getName() + ".extract");
        BegIdx = 0;
      }
      unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
      assert(SrcElemsPerTgtElem);
      BegIdx /= SrcElemsPerTgtElem;
      bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
      auto *NewBC =
          BCAlreadyExists
              ? NewBCs[CastSrcTy]
              : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
      if (!BCAlreadyExists)
        NewBCs[CastSrcTy] = NewBC;
      auto *Ext = Builder->CreateExtractElement(
          NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
      // The shufflevector isn't being replaced: the bitcast that used it
      // is. InstCombine will visit the newly-created instructions.
      replaceInstUsesWith(*BC, Ext);
      MadeChange = true;
    }
  }

  // If the LHS is a shufflevector itself, see if we can combine it with this
  // one without producing an unusual shuffle.
  // Cases that might be simplified:
  // 1.
  // x1=shuffle(v1,v2,mask1)
  //  x=shuffle(x1,undef,mask)
  //        ==>
  //  x=shuffle(v1,undef,newMask)
  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
  // 2.
  // x1=shuffle(v1,undef,mask1)
  //  x=shuffle(x1,x2,mask)
  // where v1.size() == mask1.size()
  //        ==>
  //  x=shuffle(v1,x2,newMask)
  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
  // 3.
  // x2=shuffle(v2,undef,mask2)
  //  x=shuffle(x1,x2,mask)
  // where v2.size() == mask2.size()
  //        ==>
  //  x=shuffle(x1,v2,newMask)
  // newMask[i] = (mask[i] < x1.size())
  //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
  // 4.
  // x1=shuffle(v1,undef,mask1)
  // x2=shuffle(v2,undef,mask2)
  //  x=shuffle(x1,x2,mask)
  // where v1.size() == v2.size()
  //        ==>
  //  x=shuffle(v1,v2,newMask)
  // newMask[i] = (mask[i] < x1.size())
  //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
  //
  // Here we are really conservative:
  // we are absolutely afraid of producing a shuffle mask not in the input
  // program, because the code gen may not be smart enough to turn a merged
  // shuffle into two specific shuffles: it may produce worse code.  As such,
  // we only merge two shuffles if the result is either a splat or one of the
  // input shuffle masks.  In this case, merging the shuffles just removes
  // one instruction, which we know is safe.  This is good for things like
  // turning: (splat(splat)) -> splat, or
  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
  if (LHSShuffle)
    if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
      LHSShuffle = nullptr;
  if (RHSShuffle)
    if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
      RHSShuffle = nullptr;
  if (!LHSShuffle && !RHSShuffle)
    return MadeChange ? &SVI : nullptr;

  Value* LHSOp0 = nullptr;
  Value* LHSOp1 = nullptr;
  Value* RHSOp0 = nullptr;
  unsigned LHSOp0Width = 0;
  unsigned RHSOp0Width = 0;
  if (LHSShuffle) {
    LHSOp0 = LHSShuffle->getOperand(0);
    LHSOp1 = LHSShuffle->getOperand(1);
    LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
  }
  if (RHSShuffle) {
    RHSOp0 = RHSShuffle->getOperand(0);
    RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
  }
  Value* newLHS = LHS;
  Value* newRHS = RHS;
  if (LHSShuffle) {
    // case 1
    if (isa<UndefValue>(RHS)) {
      newLHS = LHSOp0;
      newRHS = LHSOp1;
    }
    // case 2 or 4
    else if (LHSOp0Width == LHSWidth) {
      newLHS = LHSOp0;
    }
  }
  // case 3 or 4
  if (RHSShuffle && RHSOp0Width == LHSWidth) {
    newRHS = RHSOp0;
  }
  // case 4
  if (LHSOp0 == RHSOp0) {
    newLHS = LHSOp0;
    newRHS = nullptr;
  }

  if (newLHS == LHS && newRHS == RHS)
    return MadeChange ? &SVI : nullptr;

  SmallVector<int, 16> LHSMask;
  SmallVector<int, 16> RHSMask;
  if (newLHS != LHS)
    LHSMask = LHSShuffle->getShuffleMask();
  if (RHSShuffle && newRHS != RHS)
    RHSMask = RHSShuffle->getShuffleMask();

  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
  SmallVector<int, 16> newMask;
  bool isSplat = true;
  int SplatElt = -1;
  // Create a new mask for the new ShuffleVectorInst so that the new
  // ShuffleVectorInst is equivalent to the original one.
  for (unsigned i = 0; i < VWidth; ++i) {
    int eltMask;
    if (Mask[i] < 0) {
      // This element is an undef value.
      eltMask = -1;
    } else if (Mask[i] < (int)LHSWidth) {
      // This element is from left hand side vector operand.
      //
      // If LHS is going to be replaced (case 1, 2, or 4), calculate the
      // new mask value for the element.
      if (newLHS != LHS) {
        eltMask = LHSMask[Mask[i]];
        // If the value selected is an undef value, explicitly specify it
        // with a -1 mask value.
        if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
          eltMask = -1;
      } else
        eltMask = Mask[i];
    } else {
      // This element is from right hand side vector operand
      //
      // If the value selected is an undef value, explicitly specify it
      // with a -1 mask value. (case 1)
      if (isa<UndefValue>(RHS))
        eltMask = -1;
      // If RHS is going to be replaced (case 3 or 4), calculate the
      // new mask value for the element.
      else if (newRHS != RHS) {
        eltMask = RHSMask[Mask[i]-LHSWidth];
        // If the value selected is an undef value, explicitly specify it
        // with a -1 mask value.
        if (eltMask >= (int)RHSOp0Width) {
          assert(isa<UndefValue>(RHSShuffle->getOperand(1))
                 && "should have been check above");
          eltMask = -1;
        }
      } else
        eltMask = Mask[i]-LHSWidth;

      // If LHS's width is changed, shift the mask value accordingly.
      // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
      // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
      // If newRHS == newLHS, we want to remap any references from newRHS to
      // newLHS so that we can properly identify splats that may occur due to
      // obfuscation across the two vectors.
      if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
        eltMask += newLHSWidth;
    }

    // Check if this could still be a splat.
    if (eltMask >= 0) {
      if (SplatElt >= 0 && SplatElt != eltMask)
        isSplat = false;
      SplatElt = eltMask;
    }

    newMask.push_back(eltMask);
  }

  // If the result mask is equal to one of the original shuffle masks,
  // or is a splat, do the replacement.
  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
    SmallVector<Constant*, 16> Elts;
    for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
      if (newMask[i] < 0) {
        Elts.push_back(UndefValue::get(Int32Ty));
      } else {
        Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
      }
    }
    if (!newRHS)
      newRHS = UndefValue::get(newLHS->getType());
    return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
  }

  // If the result mask is an identity, replace uses of this instruction with
  // corresponding argument.
  bool isLHSID, isRHSID;
  recognizeIdentityMask(newMask, isLHSID, isRHSID);
  if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
  if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);

  return MadeChange ? &SVI : nullptr;
}