/// Return a random value with a known type. Value *getRandomValue(Type *Tp) { unsigned index = Ran->Rand(); for (unsigned i=0; i<PT->size(); ++i) { Value *V = PT->at((index + i) % PT->size()); if (V->getType() == Tp) return V; } // If the requested type was not found, generate a constant value. if (Tp->isIntegerTy()) { if (Ran->Rand() & 1) return ConstantInt::getAllOnesValue(Tp); return ConstantInt::getNullValue(Tp); } else if (Tp->isFloatingPointTy()) { if (Ran->Rand() & 1) return ConstantFP::getAllOnesValue(Tp); return ConstantFP::getNullValue(Tp); } else if (Tp->isVectorTy()) { VectorType *VTp = cast<VectorType>(Tp); std::vector<Constant*> TempValues; TempValues.reserve(VTp->getNumElements()); for (unsigned i = 0; i < VTp->getNumElements(); ++i) TempValues.push_back(getRandomConstant(VTp->getScalarType())); ArrayRef<Constant*> VectorValue(TempValues); return ConstantVector::get(VectorValue); } return UndefValue::get(Tp); }
bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); if (!DstVT || !SrcVT) return false; unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); IRBuilder<> Builder(BCI.getParent(), &BCI); Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); ValueVector Res; Res.resize(DstNumElems); if (DstNumElems == SrcNumElems) { for (unsigned I = 0; I < DstNumElems; ++I) Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), BCI.getName() + ".i" + Twine(I)); } else if (DstNumElems > SrcNumElems) { // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the // individual elements to the destination. unsigned FanOut = DstNumElems / SrcNumElems; Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); unsigned ResI = 0; for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { Value *V = Op0[Op0I]; Instruction *VI; // Look through any existing bitcasts before converting to <N x t2>. // In the best case, the resulting conversion might be a no-op. while ((VI = dyn_cast<Instruction>(V)) && VI->getOpcode() == Instruction::BitCast) V = VI->getOperand(0); V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); Scatterer Mid = scatter(&BCI, V); for (unsigned MidI = 0; MidI < FanOut; ++MidI) Res[ResI++] = Mid[MidI]; } } else { // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. unsigned FanIn = SrcNumElems / DstNumElems; Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); unsigned Op0I = 0; for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { Value *V = UndefValue::get(MidTy); for (unsigned MidI = 0; MidI < FanIn; ++MidI) V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), BCI.getName() + ".i" + Twine(ResI) + ".upto" + Twine(MidI)); Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), BCI.getName() + ".i" + Twine(ResI)); } } gather(&BCI, Res); return true; }
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. if (isa<UndefValue>(V)) return true; VectorType *VecTy = dyn_cast<VectorType>(V->getType()); if (!VecTy) { KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); return Known.isZero(); } // Per-component check doesn't work with zeroinitializer Constant *C = dyn_cast<Constant>(V); if (!C) return false; if (C->isZeroValue()) return true; // For a vector, KnownZero will only be true if all values are zero, so check // this per component for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa<UndefValue>(Elem)) return true; KnownBits Known = computeKnownBits(Elem, DL); if (Known.isZero()) return true; } return false; }
bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { VectorType *VT = dyn_cast<VectorType>(GEPI.getType()); if (!VT) return false; IRBuilder<> Builder(&GEPI); unsigned NumElems = VT->getNumElements(); unsigned NumIndices = GEPI.getNumIndices(); Scatterer Base = scatter(&GEPI, GEPI.getOperand(0)); SmallVector<Scatterer, 8> Ops; Ops.resize(NumIndices); for (unsigned I = 0; I < NumIndices; ++I) Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1)); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { SmallVector<Value *, 8> Indices; Indices.resize(NumIndices); for (unsigned J = 0; J < NumIndices; ++J) Indices[J] = Ops[J][I]; Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, GEPI.getName() + ".i" + Twine(I)); if (GEPI.isInBounds()) if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) NewGEPI->setIsInBounds(); } gather(&GEPI, Res); return true; }
bool Scalarizer::visitSelectInst(SelectInst &SI) { VectorType *VT = dyn_cast<VectorType>(SI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(SI.getParent(), &SI); Scatterer Op1 = scatter(&SI, SI.getOperand(1)); Scatterer Op2 = scatter(&SI, SI.getOperand(2)); assert(Op1.size() == NumElems && "Mismatched select"); assert(Op2.size() == NumElems && "Mismatched select"); ValueVector Res; Res.resize(NumElems); if (SI.getOperand(0)->getType()->isVectorTy()) { Scatterer Op0 = scatter(&SI, SI.getOperand(0)); assert(Op0.size() == NumElems && "Mismatched select"); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], SI.getName() + ".i" + Twine(I)); } else { Value *Op0 = SI.getOperand(0); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], SI.getName() + ".i" + Twine(I)); } gather(&SI, Res); return true; }
/// getEVT - Return the value type corresponding to the specified type. This /// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types /// are returned as Other, otherwise they are invalid. EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: return MVT::getVT(Ty, HandleUnknown); case Type::IntegerTyID: return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), VTy->getNumElements()); } } }
/// \brief Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *VTy = cast<VectorType>(V->getType()); unsigned Width = VTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(VTy->getElementType()); if (Constant *C = dyn_cast<Constant>(V)) return C->getAggregateElement(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) return nullptr; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the // inserted value. if (EltNo == IIElt) return III->getOperand(1); // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return findScalarElement(III->getOperand(0), EltNo); } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(VTy->getElementType()); if (InEl < (int)LHSWidth) return findScalarElement(SVI->getOperand(0), InEl); return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); } // Extract a value from a vector add operation with a constant zero. Value *Val = nullptr; Constant *Con = nullptr; if (match(V, llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), llvm::PatternMatch::m_Constant(Con)))) { if (Con->getAggregateElement(EltNo)->isNullValue()) return findScalarElement(Val, EltNo); } // Otherwise, we don't know. return nullptr; }
Value* cg_extension::extract_elements( Value* src, size_t start_pos, size_t length ) { VectorType* vty = llvm::cast<VectorType>(src->getType()); uint32_t elem_count = vty->getNumElements(); if( start_pos == 0 && length == elem_count ){ return src; } vector<int> indexes; indexes.resize( length, 0 ); for ( size_t i_elem = 0; i_elem < length; ++i_elem ){ indexes[i_elem] = static_cast<int>(i_elem + start_pos); } Value* mask = get_vector<int>( ArrayRef<int>(indexes) ); return builder_->CreateShuffleVector( src, UndefValue::get( src->getType() ), mask ); }
bool Scalarizer::visitCastInst(CastInst &CI) { VectorType *VT = dyn_cast<VectorType>(CI.getDestTy()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(CI.getParent(), &CI); Scatterer Op0 = scatter(&CI, CI.getOperand(0)); assert(Op0.size() == NumElems && "Mismatched cast"); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), CI.getName() + ".i" + Twine(I)); gather(&CI, Res); return true; }
/// FindScalarElement - Given a vector and an element number, see if the scalar /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); if (isa<UndefValue>(V)) return UndefValue::get(PTy->getElementType()); if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(PTy->getElementType()); if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) return CP->getOperand(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) return 0; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the // inserted value. if (EltNo == IIElt) return III->getOperand(1); // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return FindScalarElement(III->getOperand(0), EltNo); } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { unsigned LHSWidth = cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(PTy->getElementType()); if (InEl < (int)LHSWidth) return FindScalarElement(SVI->getOperand(0), InEl); return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } // Otherwise, we don't know. return 0; }
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { VectorType *VT = dyn_cast<VectorType>(I.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(I.getParent(), &I); Scatterer Op0 = scatter(&I, I.getOperand(0)); Scatterer Op1 = scatter(&I, I.getOperand(1)); assert(Op0.size() == NumElems && "Mismatched binary operation"); assert(Op1.size() == NumElems && "Mismatched binary operation"); ValueVector Res; Res.resize(NumElems); for (unsigned Elem = 0; Elem < NumElems; ++Elem) Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], I.getName() + ".i" + Twine(Elem)); gather(&I, Res); return true; }
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { VectorType *VT = cast<VectorType>(Tp); if (VT->getNumElements() == 2 && DL.getTypeSizeInBits(VT->getElementType()) == 16) { // With op_sel VOP3P instructions freely can access the low half or high // half of a register, so any swizzle is free. switch (Kind) { case TTI::SK_Broadcast: case TTI::SK_Reverse: case TTI::SK_PermuteSingleSrc: return 0; default: break; } } } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); }
bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast<VectorType>(SVI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); ValueVector Res; Res.resize(NumElems); for (unsigned I = 0; I < NumElems; ++I) { int Selector = SVI.getMaskValue(I); if (Selector < 0) Res[I] = UndefValue::get(VT->getElementType()); else if (unsigned(Selector) < Op0.size()) Res[I] = Op0[Selector]; else Res[I] = Op1[Selector - Op0.size()]; } gather(&SVI, Res); return true; }
static bool isZero(Value *V, const DataLayout *DL) { // Assume undef could be zero. if (isa<UndefValue>(V)) return true; VectorType *VecTy = dyn_cast<VectorType>(V->getType()); if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(V, KnownZero, KnownOne, DL); return KnownZero.isAllOnesValue(); } // Per-component check doesn't work with zeroinitializer Constant *C = dyn_cast<Constant>(V); if (!C) return false; if (C->isZeroValue()) return true; // For a vector, KnownZero will only be true if all values are zero, so check // this per component unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa<UndefValue>(Elem)) return true; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(Elem, KnownZero, KnownOne, DL); if (KnownZero.isAllOnesValue()) return true; } return false; }
/// Return the value type corresponding to the specified type. This returns all /// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned /// as Other, otherwise they are invalid. MVT MVT::getVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: if (HandleUnknown) return MVT(MVT::Other); llvm_unreachable("Unknown type!"); case Type::VoidTyID: return MVT::isVoid; case Type::IntegerTyID: return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth()); case Type::HalfTyID: return MVT(MVT::f16); case Type::FloatTyID: return MVT(MVT::f32); case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); case Type::X86_MMXTyID: return MVT(MVT::x86mmx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); case Type::PointerTyID: return MVT(MVT::iPTR); case Type::VectorTyID: { VectorType *VTy = cast<VectorType>(Ty); return getVectorVT( getVT(VTy->getElementType(), false), VTy->getNumElements()); } } }
bool Scalarizer::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast<VectorType>(PHI.getType()); if (!VT) return false; unsigned NumElems = VT->getNumElements(); IRBuilder<> Builder(PHI.getParent(), &PHI); ValueVector Res; Res.resize(NumElems); unsigned NumOps = PHI.getNumOperands(); for (unsigned I = 0; I < NumElems; ++I) Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, PHI.getName() + ".i" + Twine(I)); for (unsigned I = 0; I < NumOps; ++I) { Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); for (unsigned J = 0; J < NumElems; ++J) cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); } gather(&PHI, Res); return true; }
virtual void Act() { Value *V = getRandomVal(); Type *VTy = V->getType(); Type *DestTy = pickScalarType(); // Handle vector casts vectors. if (VTy->isVectorTy()) { VectorType *VecTy = cast<VectorType>(VTy); DestTy = pickVectorType(VecTy->getNumElements()); } // no need to casr. if (VTy == DestTy) return; // Pointers: if (VTy->isPointerTy()) { if (!DestTy->isPointerTy()) DestTy = PointerType::get(DestTy, 0); return PT->push_back( new BitCastInst(V, DestTy, "PC", BB->getTerminator())); } // Generate lots of bitcasts. if ((Ran->Rand() & 1) && VTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) { return PT->push_back( new BitCastInst(V, DestTy, "BC", BB->getTerminator())); } // Both types are integers: if (VTy->getScalarType()->isIntegerTy() && DestTy->getScalarType()->isIntegerTy()) { if (VTy->getScalarType()->getPrimitiveSizeInBits() > DestTy->getScalarType()->getPrimitiveSizeInBits()) { return PT->push_back( new TruncInst(V, DestTy, "Tr", BB->getTerminator())); } else { if (Ran->Rand() & 1) return PT->push_back( new ZExtInst(V, DestTy, "ZE", BB->getTerminator())); return PT->push_back(new SExtInst(V, DestTy, "Se", BB->getTerminator())); } } // Fp to int. if (VTy->getScalarType()->isFloatingPointTy() && DestTy->getScalarType()->isIntegerTy()) { if (Ran->Rand() & 1) return PT->push_back( new FPToSIInst(V, DestTy, "FC", BB->getTerminator())); return PT->push_back(new FPToUIInst(V, DestTy, "FC", BB->getTerminator())); } // Int to fp. if (VTy->getScalarType()->isIntegerTy() && DestTy->getScalarType()->isFloatingPointTy()) { if (Ran->Rand() & 1) return PT->push_back( new SIToFPInst(V, DestTy, "FC", BB->getTerminator())); return PT->push_back(new UIToFPInst(V, DestTy, "FC", BB->getTerminator())); } // Both floats. if (VTy->getScalarType()->isFloatingPointTy() && DestTy->getScalarType()->isFloatingPointTy()) { if (VTy->getScalarType()->getPrimitiveSizeInBits() > DestTy->getScalarType()->getPrimitiveSizeInBits()) { return PT->push_back( new FPTruncInst(V, DestTy, "Tr", BB->getTerminator())); } else { return PT->push_back( new FPExtInst(V, DestTy, "ZE", BB->getTerminator())); } } }
/// WriteTypeTable - Write out the type table for a module. static void WriteTypeTable(const NaClValueEnumerator &VE, NaClBitstreamWriter &Stream) { DEBUG(dbgs() << "-> WriteTypeTable\n"); const NaClValueEnumerator::TypeList &TypeList = VE.getTypes(); Stream.EnterSubblock(naclbitc::TYPE_BLOCK_ID_NEW, TYPE_MAX_ABBREV); SmallVector<uint64_t, 64> TypeVals; // Abbrev for TYPE_CODE_FUNCTION. NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); if (TYPE_FUNCTION_ABBREV != Stream.EmitAbbrev(Abbv)) llvm_unreachable("Unexpected abbrev ordering!"); // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(naclbitc::TYPE_CODE_NUMENTRY, TypeVals); TypeVals.clear(); // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; switch (T->getTypeID()) { default: llvm_unreachable("Unknown type!"); case Type::VoidTyID: Code = naclbitc::TYPE_CODE_VOID; break; case Type::FloatTyID: Code = naclbitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = naclbitc::TYPE_CODE_DOUBLE; break; case Type::IntegerTyID: // INTEGER: [width] Code = naclbitc::TYPE_CODE_INTEGER; TypeVals.push_back(cast<IntegerType>(T)->getBitWidth()); break; case Type::VectorTyID: { VectorType *VT = cast<VectorType>(T); // VECTOR [numelts, eltty] Code = naclbitc::TYPE_CODE_VECTOR; TypeVals.push_back(VT->getNumElements()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); break; } case Type::FunctionTyID: { FunctionType *FT = cast<FunctionType>(T); // FUNCTION: [isvararg, retty, paramty x N] Code = naclbitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); AbbrevToUse = TYPE_FUNCTION_ABBREV; break; } case Type::StructTyID: report_fatal_error("Struct types are not supported in PNaCl bitcode"); case Type::ArrayTyID: report_fatal_error("Array types are not supported in PNaCl bitcode"); } // Emit the finished record. Stream.EmitRecord(Code, TypeVals, AbbrevToUse); TypeVals.clear(); } Stream.ExitBlock(); DEBUG(dbgs() << "<- WriteTypeTable\n"); }
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) return false; auto &TPC = getAnalysis<TargetPassConfig>(); const TargetMachine &TM = TPC.getTM<TargetMachine>(); const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); unsigned MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) return false; CallInst *KernArgSegment = Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {}, nullptr, F.getName() + ".kernarg.segment"); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize)); unsigned AS = KernArgSegment->getType()->getPointerAddressSpace(); uint64_t ExplicitArgOffset = 0; for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); unsigned Align = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; if (Arg.use_empty()) continue; if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) { // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing // modes on SI to know the high bits are 0 so pointer adds don't wrap. We // can't represent this with range metadata because it's only allowed for // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) continue; // FIXME: We can replace this with equivalent alias.scope/noalias // metadata, but this appears to be a lot of work. if (Arg.hasNoAliasAttr()) continue; } VectorType *VT = dyn_cast<VectorType>(ArgTy); bool IsV3 = VT && VT->getNumElements() == 3; bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType(); VectorType *V4Ty = nullptr; int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, KernArgBaseAlign); Value *ArgPtr; Type *AdjustedArgTy; if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant // bits. // // Additionally widen any sub-dword load to i32 even if suitably aligned, // so that CSE between different argument loads works easily. ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, AlignDownOffset, Arg.getName() + ".kernarg.offset.align.down"); AdjustedArgTy = Builder.getInt32Ty(); } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, EltOffset, Arg.getName() + ".kernarg.offset"); AdjustedArgTy = ArgTy; } if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads AdjustedArgTy = V4Ty; } ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); if (isa<PointerType>(ArgTy)) { if (Arg.hasNonNullAttr()) Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {})); uint64_t DerefBytes = Arg.getDereferenceableBytes(); if (DerefBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable, MDNode::get(Ctx, MDB.createConstant( ConstantInt::get(Builder.getInt64Ty(), DerefBytes)))); } uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes(); if (DerefOrNullBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable_or_null, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), DerefOrNullBytes)))); } unsigned ParamAlign = Arg.getParamAlignment(); if (ParamAlign != 0) { Load->setMetadata( LLVMContext::MD_align, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), ParamAlign)))); } } // TODO: Convert noalias arg to !noalias if (DoShiftOpt) { Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); IntegerType *ArgIntTy = Builder.getIntNTy(Size); Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy); Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy, Arg.getName() + ".load"); Arg.replaceAllUsesWith(NewVal); } else if (IsV3) { Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty), {0, 1, 2}, Arg.getName() + ".load"); Arg.replaceAllUsesWith(Shuf); } else { Load->setName(Arg.getName() + ".load"); Arg.replaceAllUsesWith(Load); } } KernArgSegment->addAttribute( AttributeList::ReturnIndex, Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; }
void StoreInitializer::append(const Constant *CV, StringRef Var) { switch (CV->getValueID()) { case Value::ConstantArrayVal: { // Recursive type. const ConstantArray *CA = cast<ConstantArray>(CV); for (unsigned I = 0, E = CA->getNumOperands(); I < E; ++I) append(cast<Constant>(CA->getOperand(I)), Var); break; } case Value::ConstantDataArrayVal: { const ConstantDataArray *CVE = cast<ConstantDataArray>(CV); for (unsigned I = 0, E = CVE->getNumElements(); I < E; ++I) append(cast<Constant>(CVE->getElementAsConstant(I)), Var); break; } case Value::ConstantStructVal: { // Recursive type. const ConstantStruct *S = cast<ConstantStruct>(CV); StructType *ST = S->getType(); const StructLayout *SL = DL.getStructLayout(ST); uint64_t StructSize = DL.getTypeAllocSize(ST); uint64_t BaseOffset = SL->getElementOffset(0); for (unsigned I = 0, E = S->getNumOperands(); I < E; ++I) { Constant *Elt = cast<Constant>(S->getOperand(I)); append(Elt, Var); uint64_t EltSize = DL.getTypeAllocSize(Elt->getType()); uint64_t EltOffset = SL->getElementOffset(I); uint64_t PaddedEltSize; if (I == E - 1) PaddedEltSize = BaseOffset + StructSize - EltOffset; else PaddedEltSize = SL->getElementOffset(I + 1) - EltOffset; // Match structure layout by padding with zeroes. while (EltSize < PaddedEltSize) { LE.write(static_cast<uint8_t>(0)); ++EltSize; } } break; } case Value::ConstantVectorVal: { // Almost leaf type. const ConstantVector *CVE = cast<ConstantVector>(CV); VectorType *Ty = CVE->getType(); Type *EltTy = Ty->getElementType(); unsigned NElts = Ty->getNumElements(); unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); unsigned I; for (I = 0; I < NElts; ++I) append(cast<Constant>(CVE->getOperand(I)), Var); Constant *Zero = Constant::getNullValue(EltTy); while (I < RealNElts) { append(Zero, Var); ++I; } break; } case Value::ConstantDataVectorVal: { const ConstantDataVector *CVE = cast<ConstantDataVector>(CV); VectorType *Ty = CVE->getType(); Type *EltTy = Ty->getElementType(); unsigned NElts = Ty->getNumElements(); unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); unsigned I; for (I = 0; I < NElts; ++I) append(cast<Constant>(CVE->getElementAsConstant(I)), Var); Constant *Zero = Constant::getNullValue(EltTy); while (I < RealNElts) { append(Zero, Var); ++I; } break; } case Value::ConstantIntVal: { const ConstantInt *CI = cast<ConstantInt>(CV); if (CI->getType()->isIntegerTy(1)) { LE.write(static_cast<uint8_t>(CI->getZExtValue() ? 1 : 0)); } else { switch (CI->getBitWidth()) { case 8: LE.write(static_cast<uint8_t>(CI->getZExtValue())); break; case 16: LE.write(static_cast<uint16_t>(CI->getZExtValue())); break; case 32: LE.write(static_cast<uint32_t>(CI->getZExtValue())); break; case 64: LE.write(static_cast<uint64_t>(CI->getZExtValue())); break; } } break; } case Value::ConstantFPVal: { const ConstantFP *CFP = cast<ConstantFP>(CV); if (CFP->getType()->isFloatTy()) LE.write(CFP->getValueAPF().convertToFloat()); else if (CFP->getType()->isDoubleTy()) LE.write(CFP->getValueAPF().convertToDouble()); else llvm_unreachable("unhandled ConstantFP type"); break; } case Value::ConstantPointerNullVal: { unsigned AS = CV->getType()->getPointerAddressSpace(); if (DL.getPointerSize(AS) == 8) LE.write(static_cast<uint64_t>(0)); else LE.write(static_cast<uint32_t>(0)); break; } case Value::UndefValueVal: case Value::ConstantAggregateZeroVal: { uint64_t Size = DL.getTypeAllocSize(CV->getType()); for (uint64_t I = 0; I < Size / InitEltSize; ++I) { switch (InitEltSize) { case 1: LE.write(static_cast<uint8_t>(0)); break; case 2: LE.write(static_cast<uint16_t>(0)); break; case 4: LE.write(static_cast<uint32_t>(0)); break; case 8: LE.write(static_cast<uint64_t>(0)); break; default: llvm_unreachable("unhandled size"); } } break; } case Value::GlobalVariableVal: case Value::ConstantExprVal: { const MCExpr *Expr = AP.lowerConstant(CV); // Offset that address needs to be written at is the current size of the // buffer. uint64_t CurrOffset = dataSizeInBytes(); unsigned Size = DL.getTypeAllocSize(CV->getType()); switch (Size) { case 4: LE.write(static_cast<uint32_t>(0)); break; case 8: LE.write(static_cast<uint64_t>(0)); break; default: llvm_unreachable("unhandled size"); } VarInitAddresses.emplace_back(CurrOffset, Expr); break; } default: llvm_unreachable("unhandled initializer"); } }
int BoUpSLP::getTreeRollCost(ValueList &VL, unsigned Depth) { if (Depth == 6) return max_cost; Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) ScalarTy = SI->getValueOperand()->getType(); /// Don't mess with vectors. if (ScalarTy->isVectorTy()) return max_cost; VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); // Check if all of the operands are constants. bool AllConst = true; bool AllSameScalar = true; for (unsigned i = 0, e = VL.size(); i < e; ++i) { AllConst &= isa<Constant>(VL[i]); AllSameScalar &= (VL[0] == VL[i]); // Must have a single use. Instruction *I = dyn_cast<Instruction>(VL[i]); // Need to scalarize instructions with multiple users or from other BBs. if (I && ((I->getNumUses() > 1) || (I->getParent() != BB))) return getScalarizationCost(VecTy); } // Is this a simple vector constant. if (AllConst) return 0; // If all of the operands are identical we can broadcast them. if (AllSameScalar) return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); // Scalarize unknown structures. Instruction *VL0 = dyn_cast<Instruction>(VL[0]); if (!VL0) return getScalarizationCost(VecTy); assert(VL0->getParent() == BB && "Wrong BB"); unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *I = dyn_cast<Instruction>(VL[i]); // If not all of the instructions are identical then we have to scalarize. if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); } // Check if it is safe to sink the loads or the stores. if (Opcode == Instruction::Load || Opcode == Instruction::Store) { int MaxIdx = InstrIdx[VL0]; for (unsigned i = 1, e = VL.size(); i < e; ++i ) MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); Instruction *Last = InstrVec[MaxIdx]; for (unsigned i = 0, e = VL.size(); i < e; ++i ) { if (VL[i] == Last) continue; Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); if (Barrier) { DEBUG(dbgs() << "LR: Can't sink " << *VL[i] << "\n down to " << *Last << "\n because of " << *Barrier << "\n"); return max_cost; } } } switch (Opcode) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { ValueList Operands; int Cost = 0; // Calculate the cost of all of the operands. for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { // Prepare the operand vector. for (unsigned j = 0; j < VL.size(); ++j) Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); Cost += getTreeRollCost(Operands, Depth+1); Operands.clear(); } // Calculate the cost of this instruction. int ScalarCost = VecTy->getNumElements() * TTI->getArithmeticInstrCost(Opcode, ScalarTy); int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); Cost += (VecCost - ScalarCost); return Cost; } case Instruction::Load: { // If we are scalarize the loads, add the cost of forming the vector. for (unsigned i = 0, e = VL.size()-1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i+1])) return getScalarizationCost(VecTy); // Cost of wide load - cost of scalar loads. int ScalarLdCost = VecTy->getNumElements() * TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); return VecLdCost - ScalarLdCost; } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. int ScalarStCost = VecTy->getNumElements() * TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); int StoreCost = VecStCost - ScalarStCost; ValueList Operands; for (unsigned j = 0; j < VL.size(); ++j) { Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); MemBarrierIgnoreList.insert(VL[j]); } int TotalCost = StoreCost + getTreeRollCost(Operands, Depth + 1); MemBarrierIgnoreList.clear(); return TotalCost; } default: // Unable to vectorize unknown instructions. return getScalarizationCost(VecTy); } }
// Translate a masked store intrinsic, like // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 // br i1 %2, label %cond.store, label %else // // cond.store: ; preds = %0 // %3 = extractelement <16 x i32> %val, i32 0 // %4 = getelementptr i32* %1, i32 0 // store i32 %3, i32* %4 // br label %else // // else: ; preds = %0, %cond.store // %5 = extractelement <16 x i1> %mask, i32 1 // br i1 %5, label %cond.store1, label %else2 // // cond.store1: ; preds = %else // %6 = extractelement <16 x i32> %val, i32 1 // %7 = getelementptr i32* %1, i32 1 // store i32 %6, i32* %7 // br label %else2 // . . . static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); VectorType *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Builder.CreateAlignedStore(Src, Ptr, AlignVal); CI->eraseFromParent(); return; } // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); } CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // // %OneElt = extractelement <16 x i32> %Src, i32 Idx // %EltAddr = getelementptr i32* %1, i32 0 // %store i32 %OneElt, i32* %EltAddr // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Idx); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); Builder.CreateAlignedStore(OneElt, Gep, AlignVal); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } CI->eraseFromParent(); ModifiedDT = true; }
// Translate a masked gather intrinsic like // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, // <16 x i1> %Mask, <16 x i32> %Src) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind // %Mask0 = extractelement <16 x i1> %Mask, i32 0 // br i1 %Mask0, label %cond.load, label %else // // cond.load: // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 // %Load0 = load i32, i32* %Ptr0, align 4 // %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0 // br label %else // // else: // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0] // %Mask1 = extractelement <16 x i1> %Mask, i32 1 // br i1 %Mask1, label %cond.load1, label %else2 // // cond.load1: // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 // %Load1 = load i32, i32* %Ptr1, align 4 // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1 // br label %else2 // . . . // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); VectorType *VecType = cast<VectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // The result vector Value *VResult = Src0; unsigned VectorWidth = VecType->getNumElements(); // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); VResult = Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 // br i1 %Mask1, label %cond.load, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); // Create "cond" block // // %EltAddr = getelementptr i32* %1, i32 0 // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx)); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); Phi->addIncoming(NewVResult, CondBlock); Phi->addIncoming(VResult, PrevIfBlock); VResult = Phi; } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); ModifiedDT = true; }
// Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* // %2 = extractelement <16 x i1> %mask, i32 0 // br i1 %2, label %cond.load, label %else // // cond.load: ; preds = %0 // %3 = getelementptr i32* %1, i32 0 // %4 = load i32* %3 // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0 // br label %else // // else: ; preds = %0, %cond.load // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ] // %6 = extractelement <16 x i1> %mask, i32 1 // br i1 %6, label %cond.load1, label %else2 // // cond.load1: ; preds = %else // %7 = getelementptr i32* %1, i32 1 // %8 = load i32* %7 // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1 // br label %else2 // // else2: ; preds = %else, %cond.load1 // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ] // %10 = extractelement <16 x i1> %mask, i32 2 // br i1 %10, label %cond.load4, label %else5 // static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); Value *Src0 = CI->getArgOperand(3); unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); VectorType *VecType = cast<VectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Short-cut if the mask is all-true. if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) { Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal); CI->replaceAllUsesWith(NewI); CI->eraseFromParent(); return; } // Adjust alignment for the scalar instruction. AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8); // Bitcast %addr from i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); unsigned VectorWidth = VecType->getNumElements(); // The result vector Value *VResult = Src0; if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); VResult = Builder.CreateInsertElement(VResult, Load, Idx); } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.load, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Idx); // Create "cond" block // // %EltAddr = getelementptr i32* %1, i32 0 // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx); LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal); Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); BasicBlock *PrevIfBlock = IfBlock; IfBlock = NewIfBlock; // Create the phi to join the new and previous value. PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); Phi->addIncoming(NewVResult, CondBlock); Phi->addIncoming(VResult, PrevIfBlock); VResult = Phi; } CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); ModifiedDT = true; }
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); SmallVector<int, 16> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); bool MadeChange = false; // Undefined shuffle mask -> undefined value. if (isa<UndefValue>(SVI.getOperand(2))) return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { if (V != &SVI) return replaceInstUsesWith(SVI, V); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { if (isa<UndefValue>(LHS) && LHS == RHS) { // shuffle(undef,undef,mask) -> undef. Value *Result = (VWidth == LHSWidth) ? LHS : UndefValue::get(SVI.getType()); return replaceInstUsesWith(SVI, Result); } // Remap any references to RHS to use LHS. SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { if (Mask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); continue; } if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || (Mask[i] < (int)e && isa<UndefValue>(LHS))) { Mask[i] = -1; // Turn into undef. Elts.push_back(UndefValue::get(Int32Ty)); } else { Mask[i] = Mask[i] % e; // Force to LHS. Elts.push_back(ConstantInt::get(Int32Ty, Mask[i])); } } SVI.setOperand(0, SVI.getOperand(1)); SVI.setOperand(1, UndefValue::get(RHS->getType())); SVI.setOperand(2, ConstantVector::get(Elts)); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; } if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? bool isLHSID, isRHSID; recognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return replaceInstUsesWith(SVI, LHS); if (isRHSID) return replaceInstUsesWith(SVI, RHS); } if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to // a non-vector type. We can instead bitcast the original vector followed by // an extract of the desired element: // // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, // <4 x i32> <i32 0, i32 1, i32 2, i32 3> // %1 = bitcast <4 x i8> %sroa to i32 // Becomes: // %bc = bitcast <16 x i8> %in to <4 x i32> // %ext = extractelement <4 x i32> %bc, i32 0 // // If the shuffle is extracting a contiguous range of values from the input // vector then each use which is a bitcast of the extracted size can be // replaced. This will work if the vector types are compatible, and the begin // index is aligned to a value in the casted vector type. If the begin index // isn't aligned then we can shuffle the original vector (keeping the same // vector type) before extracting. // // This code will bail out if the target type is fundamentally incompatible // with vectors of the source type. // // Example of <16 x i8>, target type i32: // Index range [4,8): v-----------v Will work. // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ // <16 x i8>: | | | | | | | | | | | | | | | | | // <4 x i32>: | | | | | // +-----------+-----------+-----------+-----------+ // Index range [6,10): ^-----------^ Needs an extra shuffle. // Target type i40: ^--------------^ Won't work, bail. if (isShuffleExtractingFromLHS(SVI, Mask)) { Value *V = LHS; unsigned MaskElems = Mask.size(); unsigned BegIdx = Mask.front(); VectorType *SrcTy = cast<VectorType>(V->getType()); unsigned VecBitWidth = SrcTy->getBitWidth(); unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); assert(SrcElemBitWidth && "vector elements must have a bitwidth"); unsigned SrcNumElems = SrcTy->getNumElements(); SmallVector<BitCastInst *, 8> BCs; DenseMap<Type *, Value *> NewBCs; for (User *U : SVI.users()) if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) if (!BC->use_empty()) // Only visit bitcasts that weren't previously handled. BCs.push_back(BC); for (BitCastInst *BC : BCs) { Type *TgtTy = BC->getDestTy(); unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); if (!TgtElemBitWidth) continue; unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); if (!VecBitWidthsEqual) continue; if (!VectorType::isValidElementType(TgtTy)) continue; VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); if (!BegIsAligned) { // Shuffle the input so [0,NumElements) contains the output, and // [NumElems,SrcNumElems) is undef. SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, UndefValue::get(Int32Ty)); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(ShuffleMask), SVI.getName() + ".extract"); BegIdx = 0; } unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; assert(SrcElemsPerTgtElem); BegIdx /= SrcElemsPerTgtElem; bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); auto *NewBC = BCAlreadyExists ? NewBCs[CastSrcTy] : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); if (!BCAlreadyExists) NewBCs[CastSrcTy] = NewBC; auto *Ext = Builder->CreateExtractElement( NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); // The shufflevector isn't being replaced: the bitcast that used it // is. InstCombine will visit the newly-created instructions. replaceInstUsesWith(*BC, Ext); MadeChange = true; } } // If the LHS is a shufflevector itself, see if we can combine it with this // one without producing an unusual shuffle. // Cases that might be simplified: // 1. // x1=shuffle(v1,v2,mask1) // x=shuffle(x1,undef,mask) // ==> // x=shuffle(v1,undef,newMask) // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 // 2. // x1=shuffle(v1,undef,mask1) // x=shuffle(x1,x2,mask) // where v1.size() == mask1.size() // ==> // x=shuffle(v1,x2,newMask) // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] // 3. // x2=shuffle(v2,undef,mask2) // x=shuffle(x1,x2,mask) // where v2.size() == mask2.size() // ==> // x=shuffle(x1,v2,newMask) // newMask[i] = (mask[i] < x1.size()) // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() // 4. // x1=shuffle(v1,undef,mask1) // x2=shuffle(v2,undef,mask2) // x=shuffle(x1,x2,mask) // where v1.size() == v2.size() // ==> // x=shuffle(v1,v2,newMask) // newMask[i] = (mask[i] < x1.size()) // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() // // Here we are really conservative: // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, // we only merge two shuffles if the result is either a splat or one of the // input shuffle masks. In this case, merging the shuffles just removes // one instruction, which we know is safe. This is good for things like // turning: (splat(splat)) -> splat, or // merge(V[0..n], V[n+1..2n]) -> V[0..2n] ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) LHSShuffle = nullptr; if (RHSShuffle) if (!isa<UndefValue>(RHSShuffle->getOperand(1))) RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) return MadeChange ? &SVI : nullptr; Value* LHSOp0 = nullptr; Value* LHSOp1 = nullptr; Value* RHSOp0 = nullptr; unsigned LHSOp0Width = 0; unsigned RHSOp0Width = 0; if (LHSShuffle) { LHSOp0 = LHSShuffle->getOperand(0); LHSOp1 = LHSShuffle->getOperand(1); LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); } if (RHSShuffle) { RHSOp0 = RHSShuffle->getOperand(0); RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); } Value* newLHS = LHS; Value* newRHS = RHS; if (LHSShuffle) { // case 1 if (isa<UndefValue>(RHS)) { newLHS = LHSOp0; newRHS = LHSOp1; } // case 2 or 4 else if (LHSOp0Width == LHSWidth) { newLHS = LHSOp0; } } // case 3 or 4 if (RHSShuffle && RHSOp0Width == LHSWidth) { newRHS = RHSOp0; } // case 4 if (LHSOp0 == RHSOp0) { newLHS = LHSOp0; newRHS = nullptr; } if (newLHS == LHS && newRHS == RHS) return MadeChange ? &SVI : nullptr; SmallVector<int, 16> LHSMask; SmallVector<int, 16> RHSMask; if (newLHS != LHS) LHSMask = LHSShuffle->getShuffleMask(); if (RHSShuffle && newRHS != RHS) RHSMask = RHSShuffle->getShuffleMask(); unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; SmallVector<int, 16> newMask; bool isSplat = true; int SplatElt = -1; // Create a new mask for the new ShuffleVectorInst so that the new // ShuffleVectorInst is equivalent to the original one. for (unsigned i = 0; i < VWidth; ++i) { int eltMask; if (Mask[i] < 0) { // This element is an undef value. eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { // This element is from left hand side vector operand. // // If LHS is going to be replaced (case 1, 2, or 4), calculate the // new mask value for the element. if (newLHS != LHS) { eltMask = LHSMask[Mask[i]]; // If the value selected is an undef value, explicitly specify it // with a -1 mask value. if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) eltMask = -1; } else eltMask = Mask[i]; } else { // This element is from right hand side vector operand // // If the value selected is an undef value, explicitly specify it // with a -1 mask value. (case 1) if (isa<UndefValue>(RHS)) eltMask = -1; // If RHS is going to be replaced (case 3 or 4), calculate the // new mask value for the element. else if (newRHS != RHS) { eltMask = RHSMask[Mask[i]-LHSWidth]; // If the value selected is an undef value, explicitly specify it // with a -1 mask value. if (eltMask >= (int)RHSOp0Width) { assert(isa<UndefValue>(RHSShuffle->getOperand(1)) && "should have been check above"); eltMask = -1; } } else eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to // obfuscation across the two vectors. if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) eltMask += newLHSWidth; } // Check if this could still be a splat. if (eltMask >= 0) { if (SplatElt >= 0 && SplatElt != eltMask) isSplat = false; SplatElt = eltMask; } newMask.push_back(eltMask); } // If the result mask is equal to one of the original shuffle masks, // or is a splat, do the replacement. if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = newMask.size(); i != e; ++i) { if (newMask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); } else { Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); } } if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } // If the result mask is an identity, replace uses of this instruction with // corresponding argument. bool isLHSID, isRHSID; recognizeIdentityMask(newMask, isLHSID, isRHSID); if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS); if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS); return MadeChange ? &SVI : nullptr; }