/// \brief Check whether a GEP's indices are all constant. /// /// Respects any simplified values known during the analysis of this callsite. bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) return false; return true; }
// Returns a clone of `I` with its operands converted to those specified in // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an // operand whose address space needs to be modified might not exist in // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and // adds that operand use to UndefUsesToFix so that caller can fix them later. // // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast // from a pointer whose type already matches. Therefore, this function returns a // Value* instead of an Instruction*. static Value *cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl<const Use *> *UndefUsesToFix) { Type *NewPtrType = I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); if (I->getOpcode() == Instruction::AddrSpaceCast) { Value *Src = I->getOperand(0); // Because `I` is flat, the source address space must be specific. // Therefore, the inferred address space must be the source space, according // to our algorithm. assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); if (Src->getType() != NewPtrType) return new BitCastInst(Src, NewPtrType); return Src; } // Computes the converted pointer operands. SmallVector<Value *, 4> NewPointerOperands; for (const Use &OperandUse : I->operands()) { if (!OperandUse.get()->getType()->isPointerTy()) NewPointerOperands.push_back(nullptr); else NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); } switch (I->getOpcode()) { case Instruction::BitCast: return new BitCastInst(NewPointerOperands[0], NewPtrType); case Instruction::PHI: { assert(I->getType()->isPointerTy()); PHINode *PHI = cast<PHINode>(I); PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); NewPHI->addIncoming(NewPointerOperands[OperandNo], PHI->getIncomingBlock(Index)); } return NewPHI; } case Instruction::GetElementPtr: { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), NewPointerOperands[0], SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); NewGEP->setIsInBounds(GEP->isInBounds()); return NewGEP; } case Instruction::Select: { assert(I->getType()->isPointerTy()); return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], NewPointerOperands[2], "", nullptr, I); } default: llvm_unreachable("Unexpected opcode"); } }
bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I); bool Res = false; if (GepInst == nullptr || GepInst->getNumIndices() == 1) { ++NumIgnoredGEPs; return false; } Type *SourceTy = GepInst->getSourceElementType(); StructType *StructTy = nullptr; ConstantInt *Idx; // Check if GEP calculates address from a struct array. if (isa<StructType>(SourceTy)) { StructTy = cast<StructType>(SourceTy); Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1)); if ((Idx == nullptr || Idx->getSExtValue() != 0) && !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0) Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy)); } // Iterate all (except the first and the last) idx within each GEP instruction // for possible nested struct field address calculation. for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) { SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(), GepInst->idx_begin() + i); Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec); unsigned CounterIdx = 0; if (isa<ArrayType>(Ty)) { ArrayType *ArrayTy = cast<ArrayType>(Ty); StructTy = dyn_cast<StructType>(ArrayTy->getElementType()); if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) continue; // The last counter for struct array access. CounterIdx = getArrayCounterIdx(StructTy); } else if (isa<StructType>(Ty)) { StructTy = cast<StructType>(Ty); if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) continue; // Get the StructTy's subfield index. Idx = cast<ConstantInt>(GepInst->getOperand(i+1)); assert(Idx->getSExtValue() >= 0 && Idx->getSExtValue() < StructTy->getNumElements()); CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue(); } Res |= insertCounterUpdate(I, StructTy, CounterIdx); } if (Res) ++NumInstrumentedGEPs; else ++NumIgnoredGEPs; return Res; }
void ArrayIndexChecker::visitGetElementPtrInst(GetElementPtrInst& I) { DEBUG(dbgs() << "ArrayIndexChecker: visiting GEP " << I << "\n"); visitValue(*I.getPointerOperand()); for (auto Idx = I.idx_begin(), E = I.idx_end(); Idx != E; ++Idx) { visitValue(**Idx); } auto pos = std::find(ptr_value_vec_.begin(), ptr_value_vec_.end(), &I); assert(pos != ptr_value_vec_.end()); index_t varIdx = pos - ptr_value_vec_.begin(); assert(idx2addr_.find(varIdx) != idx2addr_.end()); if (addr2version_[idx2addr_[varIdx]] != 0) throw ArrayIndexIsNotConstant;; DEBUG(dbgs() << "ArrayIndexChecker: visited GEP\n"); }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // Find all GEPs, and simplify them. // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool SimplifyGEP::runOnModule(Module& M) { TD = &getAnalysis<TargetData>(); preprocess(M); for (Module::iterator F = M.begin(); F != M.end(); ++F){ for (Function::iterator B = F->begin(), FE = F->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE; I++) { if(!(isa<GetElementPtrInst>(I))) continue; GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); Value *PtrOp = GEP->getOperand(0); Value *StrippedPtr = PtrOp->stripPointerCasts(); // Check if the GEP base pointer is enclosed in a cast if (StrippedPtr != PtrOp) { const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP->getOperand(1))) HasZeroPointerIndex = C->isZero(); // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); if (const ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP->idx_begin()+1, GEP->idx_end()); GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); Res->setIsInBounds(GEP->isInBounds()); GEP->replaceAllUsesWith(Res); continue; } if (const ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... // At this point, we know that the cast source type is a pointer // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. GEP->setOperand(0, StrippedPtr); continue; } } } } else if (GEP->getNumOperands() == 2) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP->getContext())); Idx[1] = GEP->getOperand(1); Value *NewGEP = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); // V and GEP are both pointer types --> BitCast GEP->replaceAllUsesWith(new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP)); continue; } // Transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We // allow either a mul, shift, or constant here. Value *NewIdx = 0; ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP->getOperand(1); Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(1))) { NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; } else if (Instruction *Inst =dyn_cast<Instruction>(GEP->getOperand(1))){ if (Inst->getOpcode() == Instruction::Shl && isa<ConstantInt>(Inst->getOperand(1))) { ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && isa<ConstantInt>(Inst->getOperand(1))) { Scale = cast<ConstantInt>(Inst->getOperand(1)); NewIdx = Inst->getOperand(0); } } // If the index will be to exactly the right offset with the scale taken // out, perform the transformation. Note, we don't know whether Scale is // signed or not. We'll use unsigned version of division/modulo // operation after making sure Scale doesn't have the sign bit set. if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && Scale->getZExtValue() % ArrayEltSize == 0) { Scale = ConstantInt::get(Scale->getType(), Scale->getZExtValue() / ArrayEltSize); if (Scale->getZExtValue() != 1) { Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); NewIdx = BinaryOperator::Create(BinaryOperator::Mul, NewIdx, C, "idxscale"); } // Insert the new GEP instruction. Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP->getContext())); Idx[1] = NewIdx; Value *NewGEP = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); GEP->replaceAllUsesWith(new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP)); continue; } } } } } } } return true; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
static void ConvertOperandToType(User *U, Value *OldVal, Value *NewVal, ValueMapCache &VMC, const TargetData &TD) { if (isa<ValueHandle>(U)) return; // Valuehandles don't let go of operands... if (VMC.OperandsMapped.count(U)) return; VMC.OperandsMapped.insert(U); ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(U); if (VMCI != VMC.ExprMap.end()) return; Instruction *I = cast<Instruction>(U); // Only Instructions convertible BasicBlock *BB = I->getParent(); assert(BB != 0 && "Instruction not embedded in basic block!"); std::string Name = I->getName(); I->setName(""); Instruction *Res; // Result of conversion //std::cerr << endl << endl << "Type:\t" << Ty << "\nInst: " << I // << "BB Before: " << BB << endl; // Prevent I from being removed... ValueHandle IHandle(VMC, I); const Type *NewTy = NewVal->getType(); Constant *Dummy = (NewTy != Type::VoidTy) ? Constant::getNullValue(NewTy) : 0; switch (I->getOpcode()) { case Instruction::Cast: if (VMC.NewCasts.count(ValueHandle(VMC, I))) { // This cast has already had it's value converted, causing a new cast to // be created. We don't want to create YET ANOTHER cast instruction // representing the original one, so just modify the operand of this cast // instruction, which we know is newly created. I->setOperand(0, NewVal); I->setName(Name); // give I its name back return; } else { Res = new CastInst(NewVal, I->getType(), Name); } break; case Instruction::Add: if (isa<PointerType>(NewTy)) { Value *IndexVal = I->getOperand(OldVal == I->getOperand(0) ? 1 : 0); std::vector<Value*> Indices; BasicBlock::iterator It = I; if (const Type *ETy = ConvertibleToGEP(NewTy, IndexVal, Indices, TD,&It)){ // If successful, convert the add to a GEP //const Type *RetTy = PointerType::get(ETy); // First operand is actually the given pointer... Res = new GetElementPtrInst(NewVal, Indices, Name); assert(cast<PointerType>(Res->getType())->getElementType() == ETy && "ConvertibleToGEP broken!"); break; } } // FALLTHROUGH case Instruction::Sub: case Instruction::SetEQ: case Instruction::SetNE: { Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly unsigned OtherIdx = (OldVal == I->getOperand(0)) ? 1 : 0; Value *OtherOp = I->getOperand(OtherIdx); Res->setOperand(!OtherIdx, NewVal); Value *NewOther = ConvertExpressionToType(OtherOp, NewTy, VMC, TD); Res->setOperand(OtherIdx, NewOther); break; } case Instruction::Shl: case Instruction::Shr: assert(I->getOperand(0) == OldVal); Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), NewVal, I->getOperand(1), Name); break; case Instruction::Free: // Free can free any pointer type! assert(I->getOperand(0) == OldVal); Res = new FreeInst(NewVal); break; case Instruction::Load: { assert(I->getOperand(0) == OldVal && isa<PointerType>(NewVal->getType())); const Type *LoadedTy = cast<PointerType>(NewVal->getType())->getElementType(); Value *Src = NewVal; if (const CompositeType *CT = dyn_cast<CompositeType>(LoadedTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; // No offset, get first leaf. LoadedTy = getStructOffsetType(CT, Offset, Indices, TD, false); assert(LoadedTy->isFirstClassType()); if (Indices.size() != 1) { // Do not generate load X, 0 // Insert the GEP instruction before this load. Src = new GetElementPtrInst(Src, Indices, Name+".idx", I); } } Res = new LoadInst(Src, Name); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::Store: { if (I->getOperand(0) == OldVal) { // Replace the source value // Check to see if operand #1 has already been converted... ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(I->getOperand(1)); if (VMCI != VMC.ExprMap.end()) { // Comments describing this stuff are in the OperandConvertibleToType // switch statement for Store... // const Type *ElTy = cast<PointerType>(VMCI->second->getType())->getElementType(); Value *SrcPtr = VMCI->second; if (ElTy != NewTy) { // We check that this is a struct in the initial scan... const StructType *SElTy = cast<StructType>(ElTy); std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; const Type *Ty = getStructOffsetType(ElTy, Offset, Indices, TD,false); assert(Offset == 0 && "Offset changed!"); assert(NewTy == Ty && "Did not convert to correct type!"); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(NewVal, SrcPtr); VMC.ExprMap[I] = Res; } else { // Otherwise, we haven't converted Operand #1 over yet... const PointerType *NewPT = PointerType::get(NewTy); Res = new StoreInst(NewVal, Constant::getNullValue(NewPT)); VMC.ExprMap[I] = Res; Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), NewPT, VMC, TD)); } } else { // Replace the source pointer const Type *ValTy = cast<PointerType>(NewTy)->getElementType(); Value *SrcPtr = NewVal; if (isa<StructType>(ValTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; ValTy = getStructOffsetType(ValTy, Offset, Indices, TD, false); assert(Offset == 0 && ValTy); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(Constant::getNullValue(ValTy), SrcPtr); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), ValTy, VMC, TD)); } break; } case Instruction::GetElementPtr: { // Convert a one index getelementptr into just about anything that is // desired. // BasicBlock::iterator It = I; const Type *OldElTy = cast<PointerType>(I->getType())->getElementType(); unsigned DataSize = TD.getTypeSize(OldElTy); Value *Index = I->getOperand(1); if (DataSize != 1) { // Insert a multiply of the old element type is not a unit size... Value *CST; if (Index->getType()->isSigned()) CST = ConstantSInt::get(Index->getType(), DataSize); else CST = ConstantUInt::get(Index->getType(), DataSize); Index = BinaryOperator::create(Instruction::Mul, Index, CST, "scale", It); } // Perform the conversion now... // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(),Index,Indices,TD,&It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); assert(Res->getType() == PointerType::get(ElTy) && "ConvertibleToGet failed!"); } #if 0 if (I->getType() == PointerType::get(Type::SByteTy)) { // Convert a getelementptr sbyte * %reg111, uint 16 freely back to // anything that is a pointer type... // BasicBlock::iterator It = I; // Check to see if the second argument is an expression that can // be converted to the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(), I->getOperand(1), Indices, TD, &It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); } else { // Convert a getelementptr ulong * %reg123, uint %N // to getelementptr long * %reg123, uint %N // ... where the type must simply stay the same size... // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(NewVal, Indices, Name); } #endif break; case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(NewTy, Name); VMC.ExprMap[I] = NewPN; while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, NewTy, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Call: { Value *Meth = I->getOperand(0); std::vector<Value*> Params(I->op_begin()+1, I->op_end()); if (Meth == OldVal) { // Changing the function pointer? const PointerType *NewPTy = cast<PointerType>(NewVal->getType()); const FunctionType *NewTy = cast<FunctionType>(NewPTy->getElementType()); if (NewTy->getReturnType() == Type::VoidTy) Name = ""; // Make sure not to name a void call! // Get an iterator to the call instruction so that we can insert casts for // operands if need be. Note that we do not require operands to be // convertible, we can insert casts if they are convertible but not // compatible. The reason for this is that we prefer to have resolved // functions but casted arguments if possible. // BasicBlock::iterator It = I; // Convert over all of the call operands to their new types... but only // convert over the part that is not in the vararg section of the call. // for (unsigned i = 0; i != NewTy->getNumParams(); ++i) if (Params[i]->getType() != NewTy->getParamType(i)) { // Create a cast to convert it to the right type, we know that this // is a lossless cast... // Params[i] = new CastInst(Params[i], NewTy->getParamType(i), "callarg.cast." + Params[i]->getName(), It); } Meth = NewVal; // Update call destination to new value } else { // Changing an argument, must be in vararg area std::vector<Value*>::iterator OI = find(Params.begin(), Params.end(), OldVal); assert (OI != Params.end() && "Not using value!"); *OI = NewVal; } Res = new CallInst(Meth, Params, Name); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return; } // If the instruction was newly created, insert it into the instruction // stream. // BasicBlock::iterator It = I; assert(It != BB->end() && "Instruction not in own basic block??"); BB->getInstList().insert(It, Res); // Keep It pointing to old instruction DEBUG(std::cerr << "COT CREATED: " << (void*)Res << " " << *Res << "In: " << (void*)I << " " << *I << "Out: " << (void*)Res << " " << *Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; if (I->getType() != Res->getType()) ConvertValueToNewType(I, Res, VMC, TD); else { bool FromStart = true; Value::use_iterator UI; while (1) { if (FromStart) UI = I->use_begin(); if (UI == I->use_end()) break; if (isa<ValueHandle>(*UI)) { ++UI; FromStart = false; } else { User *U = *UI; if (!FromStart) --UI; U->replaceUsesOfWith(I, Res); if (!FromStart) ++UI; } } } }
Value *llvm::ConvertExpressionToType(Value *V, const Type *Ty, ValueMapCache &VMC, const TargetData &TD) { if (V->getType() == Ty) return V; // Already where we need to be? ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(V); if (VMCI != VMC.ExprMap.end()) { const Value *GV = VMCI->second; const Type *GTy = VMCI->second->getType(); assert(VMCI->second->getType() == Ty); if (Instruction *I = dyn_cast<Instruction>(V)) ValueHandle IHandle(VMC, I); // Remove I if it is unused now! return VMCI->second; } DEBUG(std::cerr << "CETT: " << (void*)V << " " << *V); Instruction *I = dyn_cast<Instruction>(V); if (I == 0) { Constant *CPV = cast<Constant>(V); // Constants are converted by constant folding the cast that is required. // We assume here that all casts are implemented for constant prop. Value *Result = ConstantExpr::getCast(CPV, Ty); // Add the instruction to the expression map //VMC.ExprMap[V] = Result; return Result; } BasicBlock *BB = I->getParent(); std::string Name = I->getName(); if (!Name.empty()) I->setName(""); Instruction *Res; // Result of conversion ValueHandle IHandle(VMC, I); // Prevent I from being removed! Constant *Dummy = Constant::getNullValue(Ty); switch (I->getOpcode()) { case Instruction::Cast: assert(VMC.NewCasts.count(ValueHandle(VMC, I)) == 0); Res = new CastInst(I->getOperand(0), Ty, Name); VMC.NewCasts.insert(ValueHandle(VMC, Res)); break; case Instruction::Add: case Instruction::Sub: Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), Ty, VMC, TD)); break; case Instruction::Shl: case Instruction::Shr: Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), Dummy, I->getOperand(1), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); Res = new LoadInst(Constant::getNullValue(PointerType::get(Ty)), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(LI->getPointerOperand(), PointerType::get(Ty), VMC, TD)); assert(Res->getOperand(0)->getType() == PointerType::get(Ty)); assert(Ty == Res->getType()); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(Ty, Name); VMC.ExprMap[I] = NewPN; // Add node to expression eagerly while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, Ty, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Malloc: { Res = ConvertMallocToType(cast<MallocInst>(I), Ty, Name, VMC, TD); break; } case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *PVTy = cast<PointerType>(Ty)->getElementType(); Res = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); if (GetElementPtrInst::getIndexedType(BaseType, Indices, true) == PVTy) { if (Indices.size() == 0) Res = new CastInst(GEP->getPointerOperand(), BaseType); // NOOP CAST else Res = new GetElementPtrInst(GEP->getPointerOperand(), Indices, Name); break; } } if (Res == 0 && GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, unsigned N // and we could convert this to an appropriate GEP for the new type. // const PointerType *NewSrcTy = PointerType::get(PVTy); BasicBlock::iterator It = I; // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewSrcTy, I->getOperand(1), Indices, TD, &It); if (ElTy) { assert(ElTy == PVTy && "Internal error, setup wrong!"); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, uint %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, uint %reg138 ; [int]** // if (Res == 0) { const PointerType *NewSrcTy = PointerType::get(PVTy); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } assert(Res && "Didn't find match!"); break; } case Instruction::Call: { assert(!isa<Function>(I->getOperand(0))); // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); const PointerType *NewPTy = PointerType::get(NewTy); if (Ty == Type::VoidTy) Name = ""; // Make sure not to name calls that now return void! Res = new CallInst(Constant::getNullValue(NewPTy), std::vector<Value*>(I->op_begin()+1, I->op_end()), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0),NewPTy,VMC,TD)); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return 0; } assert(Res->getType() == Ty && "Didn't convert expr to correct type!"); BB->getInstList().insert(I, Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; unsigned NumUses = I->use_size(); for (unsigned It = 0; It < NumUses; ) { unsigned OldSize = NumUses; Value::use_iterator UI = I->use_begin(); std::advance(UI, It); ConvertOperandToType(*UI, I, Res, VMC, TD); NumUses = I->use_size(); if (NumUses == OldSize) ++It; } DEBUG(std::cerr << "ExpIn: " << (void*)I << " " << *I << "ExpOut: " << (void*)Res << " " << *Res); return Res; }
// ExpressionConvertibleToType - Return true if it is possible bool llvm::ExpressionConvertibleToType(Value *V, const Type *Ty, ValueTypeCache &CTMap, const TargetData &TD) { // Expression type must be holdable in a register. if (!Ty->isFirstClassType()) return false; ValueTypeCache::iterator CTMI = CTMap.find(V); if (CTMI != CTMap.end()) return CTMI->second == Ty; // If it's a constant... all constants can be converted to a different // type. // if (isa<Constant>(V) && !isa<GlobalValue>(V)) return true; CTMap[V] = Ty; if (V->getType() == Ty) return true; // Expression already correct type! Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; // Otherwise, we can't convert! switch (I->getOpcode()) { case Instruction::Cast: // We can convert the expr if the cast destination type is losslessly // convertible to the requested type. if (!Ty->isLosslesslyConvertibleTo(I->getType())) return false; // We also do not allow conversion of a cast that casts from a ptr to array // of X to a *X. For example: cast [4 x %List *] * %val to %List * * // if (const PointerType *SPT = dyn_cast<PointerType>(I->getOperand(0)->getType())) if (const PointerType *DPT = dyn_cast<PointerType>(I->getType())) if (const ArrayType *AT = dyn_cast<ArrayType>(SPT->getElementType())) if (AT->getElementType() == DPT->getElementType()) return false; break; case Instruction::Add: case Instruction::Sub: if (!Ty->isInteger() && !Ty->isFloatingPoint()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD) || !ExpressionConvertibleToType(I->getOperand(1), Ty, CTMap, TD)) return false; break; case Instruction::Shr: if (!Ty->isInteger()) return false; if (Ty->isSigned() != V->getType()->isSigned()) return false; // FALL THROUGH case Instruction::Shl: if (!Ty->isInteger()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD)) return false; break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); if (!ExpressionConvertibleToType(LI->getPointerOperand(), PointerType::get(Ty), CTMap, TD)) return false; break; } case Instruction::PHI: { PHINode *PN = cast<PHINode>(I); // Be conservative if we find a giant PHI node. if (PN->getNumIncomingValues() > 32) return false; for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) if (!ExpressionConvertibleToType(PN->getIncomingValue(i), Ty, CTMap, TD)) return false; break; } case Instruction::Malloc: if (!MallocConvertibleToType(cast<MallocInst>(I), Ty, CTMap, TD)) return false; break; case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); const PointerType *PTy = dyn_cast<PointerType>(Ty); if (!PTy) return false; // GEP must always return a pointer... const Type *PVTy = PTy->getElementType(); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *ElTy = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); ElTy = GetElementPtrInst::getIndexedType(BaseType, Indices, true); if (ElTy == PVTy) break; // Found a match!! ElTy = 0; } if (ElTy) break; // Found a number of zeros we can strip off! // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, long N // and we could convert this to an appropriate GEP for the new type. // if (GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Do not Check to see if our incoming pointer can be converted // to be a ptr to an array of the right type... because in more cases than // not, it is simply not analyzable because of pointer/array // discrepancies. To fix this, we will insert a cast before the GEP. // // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(PTy, I->getOperand(1), Indices, TD); if (ElTy == PVTy) { if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(ElTy), CTMap, TD)) return false; // Can't continue, ExConToTy might have polluted set! break; } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, long %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, long %reg138 ; [int]** // if (GEP->getNumOperands() == 2 && PTy->getElementType()->isSized() && TD.getTypeSize(PTy->getElementType()) == TD.getTypeSize(GEP->getType()->getElementType())) { const PointerType *NewSrcTy = PointerType::get(PVTy); if (!ExpressionConvertibleToType(I->getOperand(0), NewSrcTy, CTMap, TD)) return false; break; } return false; // No match, maybe next time. } case Instruction::Call: { if (isa<Function>(I->getOperand(0))) return false; // Don't even try to change direct calls. // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(NewTy), CTMap, TD)) return false; break; } default: return false; } // Expressions are only convertible if all of the users of the expression can // have this value converted. This makes use of the map to avoid infinite // recursion. // for (Value::use_iterator It = I->use_begin(), E = I->use_end(); It != E; ++It) if (!OperandConvertibleToType(*It, I, Ty, CTMap, TD)) return false; return true; }
bool Aa::LowerGepPass::runOnFunction(Function &F) { const llvm::Type *ptr_int_type = TD->getIntPtrType(F.getContext()); for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) { BasicBlock *bb = bi; BasicBlock::iterator ii = bb->begin(); while (ii != bb->end()) { GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(ii); BasicBlock::iterator gi = ii++; if (!gep) { continue; } for (llvm::Value::use_iterator ui = gep->use_begin(), ue = gep->use_end(); ui != ue; ++ui) { Use &u = ui.getUse(); IOCode ioc = get_io_code(u); if (ioc == NOT_IO) continue; u.set(CastInst::CreatePointerCast(gep->getPointerOperand() , gep->getType() , "", gep)); } assert(gep->hasIndices() && "GEP without indices??"); llvm::Value *ptr = gep->getPointerOperand(); const Type *ctype = ptr->getType(); // deal with the base pointer first llvm::Value *base = gep->getPointerOperand(); std::string base_name = gep->getNameStr() + ".base"; llvm::Value *address = new PtrToIntInst(base, ptr_int_type, base_name + ".cast", gi); unsigned i = 0; for (User::op_iterator oi = gep->idx_begin(), oe = gep->idx_end(); oi != oe; ++oi, ++i) { llvm::Value *index = *oi; llvm::Value *offset = NULL; std::stringstream index_name; index_name << gep->getNameStr() << ".idx." << i; if (const SequentialType *qtype = dyn_cast<SequentialType>(ctype)) { // multiply index by size of element unsigned element_size = getTypePaddedSize(TD, qtype->getElementType()); const llvm::IntegerType *index_type = cast<IntegerType>(index->getType()); ConstantInt *cint = ConstantInt::get(index_type, element_size); assert(cint && "uh oh!"); offset = BinaryOperator::Create(Instruction::Mul , cint , index , index_name.str() , gi); ctype = qtype->getElementType(); } else if (const StructType *stype = dyn_cast<StructType>(ctype)) { // calculate offset into the struct const StructLayout *layout = TD->getStructLayout(stype); unsigned idx = cast<ConstantInt>(index)->getValue().getZExtValue(); unsigned struct_offset = layout->getElementOffset(idx); offset = ConstantInt::get(ptr_int_type, struct_offset); ctype = stype->getElementType(idx); } else assert(false && "unhandled offset into composite type"); // add offset to the address assert(address && "uh oh!"); std::stringstream add_name; add_name << gep->getNameStr() << ".lvl." << i; if (offset->getType() != address->getType()) { offset = CastInst::CreateIntegerCast(offset, address->getType() , false, offset->getName() + ".resized" , gi); } address = BinaryOperator::Create(Instruction::Add , address, offset , add_name.str(), gi); } if (address->getType() != ptr_int_type) address = CastInst::CreateIntegerCast(address, ptr_int_type , false, address->getName() + ".final", gi); Instruction *new_ptr = new IntToPtrInst(address, gep->getType() , gep->getName() + ".cast"); ReplaceInstWithInst(bb->getInstList(), gi, new_ptr); } } return true; }
// // Methods: insertBadIndexing() // // Description: // This method modifieds GEP indexing expressions so that their indices are // (most likely) below the bounds of the object pointed to by the source // pointer. It does this by modifying the first index to be -1. // // Return value: // true - One or more changes were made to the program. // false - No changes were made to the program. // bool FaultInjector::insertBadIndexing (Function & F) { // Worklist of allocation sites to rewrite std::vector<GetElementPtrInst *> WorkList; // // Find GEP instructions that index into an array. Add these to the // worklist. // for (Function::iterator fI = F.begin(), fE = F.end(); fI != fE; ++fI) { BasicBlock & BB = *fI; for (BasicBlock::iterator I = BB.begin(), bE = BB.end(); I != bE; ++I) { if (GetElementPtrInst * GEP = dyn_cast<GetElementPtrInst>(I)) { // Skip if we should not insert a fault. if (!doFault()) continue; WorkList.push_back (GEP); } } } // Flag whether the program was modified bool modified = (WorkList.size() > 0); // // Iterator through the worklist and transform each GEP. // while (WorkList.size()) { GetElementPtrInst * GEP = WorkList.back(); WorkList.pop_back(); // // Print out where the fault will be inserted in the source code. // printSourceInfo ("Bad indexing", GEP); // The index arguments to the new GEP std::vector<Value *> args; // // Create a copy of the GEP's indices. // User::op_iterator i = GEP->idx_begin(); if (i == GEP->idx_end()) continue; args.push_back (ConstantInt::get (Int32Type, INT_MAX, true)); for (++i; i != GEP->idx_end(); ++i) { args.push_back (*i); } // // Create the new GEP instruction. // Value * Pointer = GEP->getPointerOperand(); Twine name = GEP->getName() + "badindex"; GetElementPtrInst * NewGEP = GetElementPtrInst::Create (Pointer, args.begin(), args.end(), name, GEP); GEP->replaceAllUsesWith (NewGEP); GEP->eraseFromParent(); ++BadIndices; } return modified; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. std::map<Argument *, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements // of the argument that are accessed. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeSet, 8> ArgAttrVec; AttributeList PAL = F->getAttributes(); // First, determine the new argument list unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; // There may be remaining metadata uses of the argument for things like // llvm.dbg.value. Replace them with undef. I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; if (LoadInst *L = dyn_cast<LoadInst>(UI)) SrcTy = L->getType(); else SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), PAL.getRetAttributes(), ArgAttrVec)); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value *, 16> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value *> Ops; for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; CS.getOperandBundlesAsDefs(OpBundles); CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles, "", Call); } else { auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); NewCS = NewCall; } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) NewCS->setProfWeight(W); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { Call->replaceAllUsesWith(NewCS.getInstruction()); NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; continue; } if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, I->getParamAlignment(), "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName() + "." + Twine(i)); new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. for (User *U : TheAlloca->users()) { CallInst *Call = dyn_cast<CallInst>(U); if (!Call) continue; Call->setTailCall(false); } continue; } if (I->use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } return NF; }