unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { if (!VTTI) return -1; switch (I->getOpcode()) { case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return VTTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return VTTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return VTTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } default: // We don't have any information on this instruction. return -1; } }
/// If we have insertion into a vector that is wider than the vector that we /// are extracting from, try to widen the source vector to allow a single /// shufflevector to replace one or more insert/extract pairs. static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC) { VectorType *InsVecType = InsElt->getType(); VectorType *ExtVecType = ExtElt->getVectorOperandType(); unsigned NumInsElts = InsVecType->getVectorNumElements(); unsigned NumExtElts = ExtVecType->getVectorNumElements(); // The inserted-to vector must be wider than the extracted-from vector. if (InsVecType->getElementType() != ExtVecType->getElementType() || NumExtElts >= NumInsElts) return; // Create a shuffle mask to widen the extended-from vector using undefined // values. The mask selects all of the values of the original vector followed // by as many undefined values as needed to create a vector of the same length // as the inserted-to vector. SmallVector<Constant *, 16> ExtendMask; IntegerType *IntType = Type::getInt32Ty(InsElt->getContext()); for (unsigned i = 0; i < NumExtElts; ++i) ExtendMask.push_back(ConstantInt::get(IntType, i)); for (unsigned i = NumExtElts; i < NumInsElts; ++i) ExtendMask.push_back(UndefValue::get(IntType)); Value *ExtVecOp = ExtElt->getVectorOperand(); auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) ? ExtVecOpInst->getParent() : ExtElt->getParent(); // TODO: This restriction matches the basic block check below when creating // new extractelement instructions. If that limitation is removed, this one // could also be removed. But for now, we just bail out to ensure that we // will replace the extractelement instruction that is feeding our // insertelement instruction. This allows the insertelement to then be // replaced by a shufflevector. If the insertelement is not replaced, we can // induce infinite looping because there's an optimization for extractelement // that will delete our widening shuffle. This would trigger another attempt // here to create that shuffle, and we spin forever. if (InsertionBlock != InsElt->getParent()) return; auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); // Insert the new shuffle after the vector operand of the extract is defined // (as long as it's not a PHI) or at the start of the basic block of the // extract, so any subsequent extracts in the same basic block can use it. // TODO: Insert before the earliest ExtractElementInst that is replaced. if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); else IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); // Replace extracts from the original narrow vector with extracts from the new // wide vector. for (User *U : ExtVecOp->users()) { ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); NewExt->insertAfter(WideVec); IC.replaceInstUsesWith(*OldExt, NewExt); } }