bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); if (!DstVT || !SrcVT) return false; unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); IRBuilder<> Builder(BCI.getParent(), &BCI); Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); ValueVector Res; Res.resize(DstNumElems); if (DstNumElems == SrcNumElems) { for (unsigned I = 0; I < DstNumElems; ++I) Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), BCI.getName() + ".i" + Twine(I)); } else if (DstNumElems > SrcNumElems) { // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the // individual elements to the destination. unsigned FanOut = DstNumElems / SrcNumElems; Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); unsigned ResI = 0; for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { Value *V = Op0[Op0I]; Instruction *VI; // Look through any existing bitcasts before converting to <N x t2>. // In the best case, the resulting conversion might be a no-op. while ((VI = dyn_cast<Instruction>(V)) && VI->getOpcode() == Instruction::BitCast) V = VI->getOperand(0); V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); Scatterer Mid = scatter(&BCI, V); for (unsigned MidI = 0; MidI < FanOut; ++MidI) Res[ResI++] = Mid[MidI]; } } else { // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. unsigned FanIn = SrcNumElems / DstNumElems; Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); unsigned Op0I = 0; for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { Value *V = UndefValue::get(MidTy); for (unsigned MidI = 0; MidI < FanIn; ++MidI) V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), BCI.getName() + ".i" + Twine(ResI) + ".upto" + Twine(MidI)); Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), BCI.getName() + ".i" + Twine(ResI)); } } gather(&BCI, Res); return true; }
/// \brief Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *VTy = cast<VectorType>(V->getType()); unsigned Width = VTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(VTy->getElementType()); if (Constant *C = dyn_cast<Constant>(V)) return C->getAggregateElement(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) return nullptr; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the // inserted value. if (EltNo == IIElt) return III->getOperand(1); // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return findScalarElement(III->getOperand(0), EltNo); } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(VTy->getElementType()); if (InEl < (int)LHSWidth) return findScalarElement(SVI->getOperand(0), InEl); return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); } // Extract a value from a vector add operation with a constant zero. Value *Val = nullptr; Constant *Con = nullptr; if (match(V, llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), llvm::PatternMatch::m_Constant(Con)))) { if (Con->getAggregateElement(EltNo)->isNullValue()) return findScalarElement(Val, EltNo); } // Otherwise, we don't know. return nullptr; }
static std::string getOCLTypeName(Type *Ty, bool Signed) { switch (Ty->getTypeID()) { case Type::HalfTyID: return "half"; case Type::FloatTyID: return "float"; case Type::DoubleTyID: return "double"; case Type::IntegerTyID: { if (!Signed) return (Twine('u') + getOCLTypeName(Ty, true)).str(); unsigned BW = Ty->getIntegerBitWidth(); switch (BW) { case 8: return "char"; case 16: return "short"; case 32: return "int"; case 64: return "long"; default: return (Twine('i') + Twine(BW)).str(); } } case Type::VectorTyID: { VectorType *VecTy = cast<VectorType>(Ty); Type *EleTy = VecTy->getElementType(); unsigned Size = VecTy->getVectorNumElements(); return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); } default: return "unknown"; } }
/// FindScalarElement - Given a vector and an element number, see if the scalar /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); if (isa<UndefValue>(V)) return UndefValue::get(PTy->getElementType()); if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(PTy->getElementType()); if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) return 0; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the // inserted value. if (EltNo == IIElt) return III->getOperand(1); // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return FindScalarElement(III->getOperand(0), EltNo); } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(PTy->getElementType()); if (InEl < (int)LHSWidth) return FindScalarElement(SVI->getOperand(0), InEl); return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } // Otherwise, we don't know. return 0; }
/// getEVT - Return the value type corresponding to the specified type. This /// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types /// are returned as Other, otherwise they are invalid. EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: return MVT::getVT(Ty, HandleUnknown); case Type::IntegerTyID: return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), VTy->getNumElements()); } } }
static bool isByteSwap64(ShuffleVectorInst &SI, SmallVector<int, 16>&RefMasks) { RefMasks.clear(); unsigned VWidth = cast<VectorType>(SI.getType())->getNumElements(); VectorType *LHS = cast<VectorType>(SI.getOperand(0)->getType()); VectorType *RHS = cast<VectorType>(SI.getOperand(1)->getType()); IntegerType *IT = dyn_cast<IntegerType>(LHS->getElementType()); //When Element Type is not IntegerType or the Result's element number //can't be divided by 8, return false //TODO:Need to check all masks are all constants. if (IT == nullptr || ! IT->isIntegerTy(8) || VWidth % 8 != 0) { return false; } SmallVector<int, 16> Masks(SI.getShuffleMask()); bool isByteSwap = true; for (unsigned i = 0; i < VWidth / 8; ++i) { unsigned base = Masks[i * 8]; if (base % 8 != 7) { isByteSwap = false; break; } for (unsigned j = 1; j < 8; ++j) { if (base - Masks[i * 8 + j] != j) { isByteSwap = false; break; } } if (isByteSwap) { RefMasks.push_back(base / 8); } else { break; } } if (!isByteSwap) { RefMasks.clear(); } return isByteSwap; }
bool Scalarizer::visitCastInst(CastInst &CI) { VectorType *VT = dyn_cast<VectorType>(CI.getDestTy()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(CI.getParent(), &CI); Scatterer Op0 = scatter(&CI, CI.getOperand(0)); assert(Op0.size() == NumElems && "Mismatched cast"); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), CI.getName() + ".i" + Twine(I)); gather(&CI, Res); return true; }
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. if (isa<UndefValue>(V)) return true; VectorType *VecTy = dyn_cast<VectorType>(V->getType()); if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, dyn_cast<Instruction>(V), DT); return KnownZero.isAllOnesValue(); } // Per-component check doesn't work with zeroinitializer Constant *C = dyn_cast<Constant>(V); if (!C) return false; if (C->isZeroValue()) return true; // For a vector, KnownZero will only be true if all values are zero, so check // this per component unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa<UndefValue>(Elem)) return true; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(Elem, KnownZero, KnownOne, DL); if (KnownZero.isAllOnesValue()) return true; } return false; }
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { VectorType *VT = cast<VectorType>(Tp); if (VT->getNumElements() == 2 && DL.getTypeSizeInBits(VT->getElementType()) == 16) { // With op_sel VOP3P instructions freely can access the low half or high // half of a register, so any swizzle is free. switch (Kind) { case TTI::SK_Broadcast: case TTI::SK_Reverse: case TTI::SK_PermuteSingleSrc: return 0; default: break; } } } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); }
void ConstantInsertExtractElementIndex::fixNonConstantVectorIndices( BasicBlock &BB, const Instructions &Instrs) const { for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end(); IB != IE; ++IB) { Instruction *I = *IB; Value *Vec = I->getOperand(0); Value *Idx = getInsertExtractElementIdx(I); VectorType *VecTy = cast<VectorType>(Vec->getType()); Type *ElemTy = VecTy->getElementType(); unsigned ElemAlign = DL->getPrefTypeAlignment(ElemTy); unsigned VecAlign = std::max(ElemAlign, DL->getPrefTypeAlignment(VecTy)); IRBuilder<> IRB(I); AllocaInst *Alloca = IRB.CreateAlloca( ElemTy, ConstantInt::get(Type::getInt32Ty(M->getContext()), vectorNumElements(I))); Alloca->setAlignment(VecAlign); Value *AllocaAsVec = IRB.CreateBitCast(Alloca, VecTy->getPointerTo()); IRB.CreateAlignedStore(Vec, AllocaAsVec, Alloca->getAlignment()); Value *GEP = IRB.CreateGEP(Alloca, Idx); Value *Res; switch (I->getOpcode()) { default: llvm_unreachable("expected InsertElement or ExtractElement"); case Instruction::InsertElement: IRB.CreateAlignedStore(I->getOperand(1), GEP, ElemAlign); Res = IRB.CreateAlignedLoad(AllocaAsVec, Alloca->getAlignment()); break; case Instruction::ExtractElement: Res = IRB.CreateAlignedLoad(GEP, ElemAlign); break; } I->replaceAllUsesWith(Res); I->eraseFromParent(); } }
bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast<VectorType>(SVI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { int Selector = SVI.getMaskValue(I); if (Selector < 0) Res[I] = UndefValue::get(VT->getElementType()); else if (unsigned(Selector) < Op0.size()) Res[I] = Op0[Selector]; else Res[I] = Op1[Selector - Op0.size()]; } gather(&SVI, Res); return true; }
bool Scalarizer::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast<VectorType>(PHI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(PHI.getParent(), &PHI); ValueVector Res; Res.resize(NumElems); unsigned NumOps = PHI.getNumOperands(); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, PHI.getName() + ".i" + Twine(I)); for (unsigned I = 0; I < NumOps; ++I) { Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); for (unsigned J = 0; J < NumElems; ++J) cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); } gather(&PHI, Res); return true; }
/// Return the value type corresponding to the specified type. This returns all /// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned /// as Other, otherwise they are invalid. MVT MVT::getVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: if (HandleUnknown) return MVT(MVT::Other); llvm_unreachable("Unknown type!"); case Type::VoidTyID: return MVT::isVoid; case Type::IntegerTyID: return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth()); case Type::HalfTyID: return MVT(MVT::f16); case Type::FloatTyID: return MVT(MVT::f32); case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); case Type::X86_MMXTyID: return MVT(MVT::x86mmx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); case Type::PointerTyID: return MVT(MVT::iPTR); case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT( getVT(VTy->getElementType(), false), VTy->getNumElements()); } } }
/// If we have insertion into a vector that is wider than the vector that we /// are extracting from, try to widen the source vector to allow a single /// shufflevector to replace one or more insert/extract pairs. static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC) { VectorType *InsVecType = InsElt->getType(); VectorType *ExtVecType = ExtElt->getVectorOperandType(); unsigned NumInsElts = InsVecType->getVectorNumElements(); unsigned NumExtElts = ExtVecType->getVectorNumElements(); // The inserted-to vector must be wider than the extracted-from vector. if (InsVecType->getElementType() != ExtVecType->getElementType() || NumExtElts >= NumInsElts) return; // Create a shuffle mask to widen the extended-from vector using undefined // values. The mask selects all of the values of the original vector followed // by as many undefined values as needed to create a vector of the same length // as the inserted-to vector. SmallVector<Constant *, 16> ExtendMask; IntegerType *IntType = Type::getInt32Ty(InsElt->getContext()); for (unsigned i = 0; i < NumExtElts; ++i) ExtendMask.push_back(ConstantInt::get(IntType, i)); for (unsigned i = NumExtElts; i < NumInsElts; ++i) ExtendMask.push_back(UndefValue::get(IntType)); Value *ExtVecOp = ExtElt->getVectorOperand(); auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) ? ExtVecOpInst->getParent() : ExtElt->getParent(); // TODO: This restriction matches the basic block check below when creating // new extractelement instructions. If that limitation is removed, this one // could also be removed. But for now, we just bail out to ensure that we // will replace the extractelement instruction that is feeding our // insertelement instruction. This allows the insertelement to then be // replaced by a shufflevector. If the insertelement is not replaced, we can // induce infinite looping because there's an optimization for extractelement // that will delete our widening shuffle. This would trigger another attempt // here to create that shuffle, and we spin forever. if (InsertionBlock != InsElt->getParent()) return; auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); // Insert the new shuffle after the vector operand of the extract is defined // (as long as it's not a PHI) or at the start of the basic block of the // extract, so any subsequent extracts in the same basic block can use it. // TODO: Insert before the earliest ExtractElementInst that is replaced. if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); else IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); // Replace extracts from the original narrow vector with extracts from the new // wide vector. for (User *U : ExtVecOp->users()) { ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); NewExt->insertAfter(WideVec); IC.replaceInstUsesWith(*OldExt, NewExt); } }
/// WriteTypeTable - Write out the type table for a module. static void WriteTypeTable(const NaClValueEnumerator &VE, NaClBitstreamWriter &Stream) { DEBUG(dbgs() << "-> WriteTypeTable\n"); const NaClValueEnumerator::TypeList &TypeList = VE.getTypes(); Stream.EnterSubblock(naclbitc::TYPE_BLOCK_ID_NEW, TYPE_MAX_ABBREV); SmallVector<uint64_t, 64> TypeVals; // Abbrev for TYPE_CODE_FUNCTION. NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); if (TYPE_FUNCTION_ABBREV != Stream.EmitAbbrev(Abbv)) llvm_unreachable("Unexpected abbrev ordering!"); // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(naclbitc::TYPE_CODE_NUMENTRY, TypeVals); TypeVals.clear(); // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; switch (T->getTypeID()) { default: llvm_unreachable("Unknown type!"); case Type::VoidTyID: Code = naclbitc::TYPE_CODE_VOID; break; case Type::FloatTyID: Code = naclbitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = naclbitc::TYPE_CODE_DOUBLE; break; case Type::IntegerTyID: // INTEGER: [width] Code = naclbitc::TYPE_CODE_INTEGER; TypeVals.push_back(cast<IntegerType>(T)->getBitWidth()); break; case Type::VectorTyID: { VectorType *VT = cast<VectorType>(T); // VECTOR [numelts, eltty] Code = naclbitc::TYPE_CODE_VECTOR; TypeVals.push_back(VT->getNumElements()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); break; } case Type::FunctionTyID: { FunctionType *FT = cast<FunctionType>(T); // FUNCTION: [isvararg, retty, paramty x N] Code = naclbitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); AbbrevToUse = TYPE_FUNCTION_ABBREV; break; } case Type::StructTyID: report_fatal_error("Struct types are not supported in PNaCl bitcode"); case Type::ArrayTyID: report_fatal_error("Array types are not supported in PNaCl bitcode"); } // Emit the finished record. Stream.EmitRecord(Code, TypeVals, AbbrevToUse); TypeVals.clear(); } Stream.ExitBlock(); DEBUG(dbgs() << "<- WriteTypeTable\n"); }
void StoreInitializer::append(const Constant *CV, StringRef Var) { switch (CV->getValueID()) { case Value::ConstantArrayVal: { // Recursive type. const ConstantArray *CA = cast<ConstantArray>(CV); for (unsigned I = 0, E = CA->getNumOperands(); I < E; ++I) append(cast<Constant>(CA->getOperand(I)), Var); break; } case Value::ConstantDataArrayVal: { const ConstantDataArray *CVE = cast<ConstantDataArray>(CV); for (unsigned I = 0, E = CVE->getNumElements(); I < E; ++I) append(cast<Constant>(CVE->getElementAsConstant(I)), Var); break; } case Value::ConstantStructVal: { // Recursive type. const ConstantStruct *S = cast<ConstantStruct>(CV); StructType *ST = S->getType(); const StructLayout *SL = DL.getStructLayout(ST); uint64_t StructSize = DL.getTypeAllocSize(ST); uint64_t BaseOffset = SL->getElementOffset(0); for (unsigned I = 0, E = S->getNumOperands(); I < E; ++I) { Constant *Elt = cast<Constant>(S->getOperand(I)); append(Elt, Var); uint64_t EltSize = DL.getTypeAllocSize(Elt->getType()); uint64_t EltOffset = SL->getElementOffset(I); uint64_t PaddedEltSize; if (I == E - 1) PaddedEltSize = BaseOffset + StructSize - EltOffset; else PaddedEltSize = SL->getElementOffset(I + 1) - EltOffset; // Match structure layout by padding with zeroes. while (EltSize < PaddedEltSize) { LE.write(static_cast<uint8_t>(0)); ++EltSize; } } break; } case Value::ConstantVectorVal: { // Almost leaf type. const ConstantVector *CVE = cast<ConstantVector>(CV); VectorType *Ty = CVE->getType(); Type *EltTy = Ty->getElementType(); unsigned NElts = Ty->getNumElements(); unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); unsigned I; for (I = 0; I < NElts; ++I) append(cast<Constant>(CVE->getOperand(I)), Var); Constant *Zero = Constant::getNullValue(EltTy); while (I < RealNElts) { append(Zero, Var); ++I; } break; } case Value::ConstantDataVectorVal: { const ConstantDataVector *CVE = cast<ConstantDataVector>(CV); VectorType *Ty = CVE->getType(); Type *EltTy = Ty->getElementType(); unsigned NElts = Ty->getNumElements(); unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); unsigned I; for (I = 0; I < NElts; ++I) append(cast<Constant>(CVE->getElementAsConstant(I)), Var); Constant *Zero = Constant::getNullValue(EltTy); while (I < RealNElts) { append(Zero, Var); ++I; } break; } case Value::ConstantIntVal: { const ConstantInt *CI = cast<ConstantInt>(CV); if (CI->getType()->isIntegerTy(1)) { LE.write(static_cast<uint8_t>(CI->getZExtValue() ? 1 : 0)); } else { switch (CI->getBitWidth()) { case 8: LE.write(static_cast<uint8_t>(CI->getZExtValue())); break; case 16: LE.write(static_cast<uint16_t>(CI->getZExtValue())); break; case 32: LE.write(static_cast<uint32_t>(CI->getZExtValue())); break; case 64: LE.write(static_cast<uint64_t>(CI->getZExtValue())); break; } } break; } case Value::ConstantFPVal: { const ConstantFP *CFP = cast<ConstantFP>(CV); if (CFP->getType()->isFloatTy()) LE.write(CFP->getValueAPF().convertToFloat()); else if (CFP->getType()->isDoubleTy()) LE.write(CFP->getValueAPF().convertToDouble()); else llvm_unreachable("unhandled ConstantFP type"); break; } case Value::ConstantPointerNullVal: { unsigned AS = CV->getType()->getPointerAddressSpace(); if (DL.getPointerSize(AS) == 8) LE.write(static_cast<uint64_t>(0)); else LE.write(static_cast<uint32_t>(0)); break; } case Value::UndefValueVal: case Value::ConstantAggregateZeroVal: { uint64_t Size = DL.getTypeAllocSize(CV->getType()); for (uint64_t I = 0; I < Size / InitEltSize; ++I) { switch (InitEltSize) { case 1: LE.write(static_cast<uint8_t>(0)); break; case 2: LE.write(static_cast<uint16_t>(0)); break; case 4: LE.write(static_cast<uint32_t>(0)); break; case 8: LE.write(static_cast<uint64_t>(0)); break; default: llvm_unreachable("unhandled size"); } } break; } case Value::GlobalVariableVal: case Value::ConstantExprVal: { const MCExpr *Expr = AP.lowerConstant(CV); // Offset that address needs to be written at is the current size of the // buffer. uint64_t CurrOffset = dataSizeInBytes(); unsigned Size = DL.getTypeAllocSize(CV->getType()); switch (Size) { case 4: LE.write(static_cast<uint32_t>(0)); break; case 8: LE.write(static_cast<uint64_t>(0)); break; default: llvm_unreachable("unhandled size"); } VarInitAddresses.emplace_back(CurrOffset, Expr); break; } default: llvm_unreachable("unhandled initializer"); } }
// Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 // br i1 %2, label %cond.load, label %else // // cond.load: ; preds = %0 // %3 = getelementptr i32* %1, i32 0 // %4 = load i32* %3 // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 // br label %else // // else: ; preds = %0, %cond.load // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] // %6 = extractelement <16 x i1> %mask, i32 1 // br i1 %6, label %cond.load1, label %else2 // // cond.load1: ; preds = %else // %7 = getelementptr i32* %1, i32 1 // %8 = load i32* %7 // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 // br label %else2 // // else2: ; preds = %else, %cond.load1 // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] // %10 = extractelement <16 x i1> %mask, i32 2 // br i1 %10, label %cond.load4, label %else5 // static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); VectorType *VecType = cast<VectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); CI->replaceAllUsesWith(NewI); CI->eraseFromParent(); return; } // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); // The result vector Value *VResult = Src0; if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); VResult = Builder.CreateInsertElement(VResult, Load, Idx); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.load, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // // %EltAddr = getelementptr i32* %1, i32 0 // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; // Create the phi to join the new and previous value. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); Phi->addIncoming(NewVResult, CondBlock); Phi->addIncoming(VResult, PrevIfBlock); VResult = Phi; } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); ModifiedDT = true; }
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); SmallVector<int, 16> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); bool MadeChange = false; // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { if (V != &SVI) return replaceInstUsesWith(SVI, V); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { if (isa<UndefValue>(LHS) && LHS == RHS) { // shuffle(undef,undef,mask) -> undef. Value *Result = (VWidth == LHSWidth) ? LHS : UndefValue::get(SVI.getType()); return replaceInstUsesWith(SVI, Result); } // Remap any references to RHS to use LHS. SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { if (Mask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); continue; } if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || (Mask[i] < (int)e && isa<UndefValue>(LHS))) { Mask[i] = -1; // Turn into undef. Elts.push_back(UndefValue::get(Int32Ty)); } else { Mask[i] = Mask[i] % e; // Force to LHS. Elts.push_back(ConstantInt::get(Int32Ty, Mask[i])); } } SVI.setOperand(0, SVI.getOperand(1)); SVI.setOperand(1, UndefValue::get(RHS->getType())); SVI.setOperand(2, ConstantVector::get(Elts)); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? bool isLHSID, isRHSID; recognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return replaceInstUsesWith(SVI, LHS); if (isRHSID) return replaceInstUsesWith(SVI, RHS); } if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to // a non-vector type. We can instead bitcast the original vector followed by // an extract of the desired element: // // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, // <4 x i32> <i32 0, i32 1, i32 2, i32 3> // %1 = bitcast <4 x i8> %sroa to i32 // Becomes: // %bc = bitcast <16 x i8> %in to <4 x i32> // %ext = extractelement <4 x i32> %bc, i32 0 // // If the shuffle is extracting a contiguous range of values from the input // vector then each use which is a bitcast of the extracted size can be // replaced. This will work if the vector types are compatible, and the begin // index is aligned to a value in the casted vector type. If the begin index // isn't aligned then we can shuffle the original vector (keeping the same // vector type) before extracting. // // This code will bail out if the target type is fundamentally incompatible // with vectors of the source type. // // Example of <16 x i8>, target type i32: // Index range [4,8): v-----------v Will work. // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ // <16 x i8>: | | | | | | | | | | | | | | | | | // <4 x i32>: | | | | | // +-----------+-----------+-----------+-----------+ // Index range [6,10): ^-----------^ Needs an extra shuffle. // Target type i40: ^--------------^ Won't work, bail. if (isShuffleExtractingFromLHS(SVI, Mask)) { Value *V = LHS; unsigned MaskElems = Mask.size(); unsigned BegIdx = Mask.front(); VectorType *SrcTy = cast<VectorType>(V->getType()); unsigned VecBitWidth = SrcTy->getBitWidth(); unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); assert(SrcElemBitWidth && "vector elements must have a bitwidth"); unsigned SrcNumElems = SrcTy->getNumElements(); SmallVector<BitCastInst *, 8> BCs; DenseMap<Type *, Value *> NewBCs; for (User *U : SVI.users()) if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) if (!BC->use_empty()) // Only visit bitcasts that weren't previously handled. BCs.push_back(BC); for (BitCastInst *BC : BCs) { Type *TgtTy = BC->getDestTy(); unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); if (!TgtElemBitWidth) continue; unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); if (!VecBitWidthsEqual) continue; if (!VectorType::isValidElementType(TgtTy)) continue; VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); if (!BegIsAligned) { // Shuffle the input so [0,NumElements) contains the output, and // [NumElems,SrcNumElems) is undef. SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, UndefValue::get(Int32Ty)); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(ShuffleMask), SVI.getName() + ".extract"); BegIdx = 0; } unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; assert(SrcElemsPerTgtElem); BegIdx /= SrcElemsPerTgtElem; bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); auto *NewBC = BCAlreadyExists ? NewBCs[CastSrcTy] : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); if (!BCAlreadyExists) NewBCs[CastSrcTy] = NewBC; auto *Ext = Builder->CreateExtractElement( NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); // The shufflevector isn't being replaced: the bitcast that used it // is. InstCombine will visit the newly-created instructions. replaceInstUsesWith(*BC, Ext); MadeChange = true; } } // If the LHS is a shufflevector itself, see if we can combine it with this // one without producing an unusual shuffle. // Cases that might be simplified: // 1. // x1=shuffle(v1,v2,mask1) // x=shuffle(x1,undef,mask) // ==> // x=shuffle(v1,undef,newMask) // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 // 2. // x1=shuffle(v1,undef,mask1) // x=shuffle(x1,x2,mask) // where v1.size() == mask1.size() // ==> // x=shuffle(v1,x2,newMask) // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] // 3. // x2=shuffle(v2,undef,mask2) // x=shuffle(x1,x2,mask) // where v2.size() == mask2.size() // ==> // x=shuffle(x1,v2,newMask) // newMask[i] = (mask[i] < x1.size()) // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() // 4. // x1=shuffle(v1,undef,mask1) // x2=shuffle(v2,undef,mask2) // x=shuffle(x1,x2,mask) // where v1.size() == v2.size() // ==> // x=shuffle(v1,v2,newMask) // newMask[i] = (mask[i] < x1.size()) // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() // // Here we are really conservative: // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, // we only merge two shuffles if the result is either a splat or one of the // input shuffle masks. In this case, merging the shuffles just removes // one instruction, which we know is safe. This is good for things like // turning: (splat(splat)) -> splat, or // merge(V[0..n], V[n+1..2n]) -> V[0..2n] ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) LHSShuffle = nullptr; if (RHSShuffle) if (!isa<UndefValue>(RHSShuffle->getOperand(1))) RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) return MadeChange ? &SVI : nullptr; Value* LHSOp0 = nullptr; Value* LHSOp1 = nullptr; Value* RHSOp0 = nullptr; unsigned LHSOp0Width = 0; unsigned RHSOp0Width = 0; if (LHSShuffle) { LHSOp0 = LHSShuffle->getOperand(0); LHSOp1 = LHSShuffle->getOperand(1); LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); } if (RHSShuffle) { RHSOp0 = RHSShuffle->getOperand(0); RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); } Value* newLHS = LHS; Value* newRHS = RHS; if (LHSShuffle) { // case 1 if (isa<UndefValue>(RHS)) { newLHS = LHSOp0; newRHS = LHSOp1; } // case 2 or 4 else if (LHSOp0Width == LHSWidth) { newLHS = LHSOp0; } } // case 3 or 4 if (RHSShuffle && RHSOp0Width == LHSWidth) { newRHS = RHSOp0; } // case 4 if (LHSOp0 == RHSOp0) { newLHS = LHSOp0; newRHS = nullptr; } if (newLHS == LHS && newRHS == RHS) return MadeChange ? &SVI : nullptr; SmallVector<int, 16> LHSMask; SmallVector<int, 16> RHSMask; if (newLHS != LHS) LHSMask = LHSShuffle->getShuffleMask(); if (RHSShuffle && newRHS != RHS) RHSMask = RHSShuffle->getShuffleMask(); unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; SmallVector<int, 16> newMask; bool isSplat = true; int SplatElt = -1; // Create a new mask for the new ShuffleVectorInst so that the new // ShuffleVectorInst is equivalent to the original one. for (unsigned i = 0; i < VWidth; ++i) { int eltMask; if (Mask[i] < 0) { // This element is an undef value. eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { // This element is from left hand side vector operand. // // If LHS is going to be replaced (case 1, 2, or 4), calculate the // new mask value for the element. if (newLHS != LHS) { eltMask = LHSMask[Mask[i]]; // If the value selected is an undef value, explicitly specify it // with a -1 mask value. if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) eltMask = -1; } else eltMask = Mask[i]; } else { // This element is from right hand side vector operand // // If the value selected is an undef value, explicitly specify it // with a -1 mask value. (case 1) if (isa<UndefValue>(RHS)) eltMask = -1; // If RHS is going to be replaced (case 3 or 4), calculate the // new mask value for the element. else if (newRHS != RHS) { eltMask = RHSMask[Mask[i]-LHSWidth]; // If the value selected is an undef value, explicitly specify it // with a -1 mask value. if (eltMask >= (int)RHSOp0Width) { assert(isa<UndefValue>(RHSShuffle->getOperand(1)) && "should have been check above"); eltMask = -1; } } else eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to // obfuscation across the two vectors. if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) eltMask += newLHSWidth; } // Check if this could still be a splat. if (eltMask >= 0) { if (SplatElt >= 0 && SplatElt != eltMask) isSplat = false; SplatElt = eltMask; } newMask.push_back(eltMask); } // If the result mask is equal to one of the original shuffle masks, // or is a splat, do the replacement. if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = newMask.size(); i != e; ++i) { if (newMask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); } else { Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); } } if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } // If the result mask is an identity, replace uses of this instruction with // corresponding argument. bool isLHSID, isRHSID; recognizeIdentityMask(newMask, isLHSID, isRHSID); if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS); if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS); return MadeChange ? &SVI : nullptr; }
// Translate a masked gather intrinsic like // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, // <16 x i1> %Mask, <16 x i32> %Src) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind // %Mask0 = extractelement <16 x i1> %Mask, i32 0 // br i1 %Mask0, label %cond.load, label %else // // cond.load: // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 // %Load0 = load i32, i32* %Ptr0, align 4 // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 // br label %else // // else: // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] // %Mask1 = extractelement <16 x i1> %Mask, i32 1 // br i1 %Mask1, label %cond.load1, label %else2 // // cond.load1: // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 // %Load1 = load i32, i32* %Ptr1, align 4 // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 // br label %else2 // . . . // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); VectorType *VecType = cast<VectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // The result vector Value *VResult = Src0; unsigned VectorWidth = VecType->getNumElements(); // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); VResult = Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 // br i1 %Mask1, label %cond.load, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); // Create "cond" block // // %EltAddr = getelementptr i32* %1, i32 0 // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); Phi->addIncoming(NewVResult, CondBlock); Phi->addIncoming(VResult, PrevIfBlock); VResult = Phi; } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); ModifiedDT = true; }
// Translate a masked store intrinsic, like // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 // br i1 %2, label %cond.store, label %else // // cond.store: ; preds = %0 // %3 = extractelement <16 x i32> %val, i32 0 // %4 = getelementptr i32* %1, i32 0 // store i32 %3, i32* %4 // br label %else // // else: ; preds = %0, %cond.store // %5 = extractelement <16 x i1> %mask, i32 1 // br i1 %5, label %cond.store1, label %else2 // // cond.store1: ; preds = %else // %6 = extractelement <16 x i32> %val, i32 1 // %7 = getelementptr i32* %1, i32 1 // store i32 %6, i32* %7 // br label %else2 // . . . static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); VectorType *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Builder.CreateAlignedStore(Src, Ptr, AlignVal); CI->eraseFromParent(); return; } // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); } CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // // %OneElt = extractelement <16 x i32> %Src, i32 Idx // %EltAddr = getelementptr i32* %1, i32 0 // %store i32 %OneElt, i32* %EltAddr // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } CI->eraseFromParent(); ModifiedDT = true; }