void RTAssociate::ProcessFunctionBody(Function &F, Function &NewF, DSGraph* G, DataStructures* DS) { if (G->node_begin() == G->node_end()) return; // Quick exit if nothing to do. FuncInfo &FI = *getFuncInfo(&F); // Calculate which DSNodes are reachable from globals. If a node is reachable // from a global, we will create a global pool for it, so no argument passage // is required. G->getGlobalsGraph(); // Map all node reachable from this global to the corresponding nodes in // the globals graph. DSGraph::NodeMapTy GlobalsGraphNodeMapping; G->computeGToGGMapping(GlobalsGraphNodeMapping); // Loop over all of the nodes which are non-escaping, adding pool-allocatable // ones to the NodesToPA vector. for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end(); I != E; ++I) { DSNode *N = I; if (GlobalsGraphNodeMapping.count(N)) { // If it is a global pool, set up the pool descriptor appropriately. DSNode *GGN = GlobalsGraphNodeMapping[N].getNode(); assert(getFuncInfo(0)->PoolDescriptors[GGN] && "Should be in global mapping!"); FI.PoolDescriptors[N] = getFuncInfo(0)->PoolDescriptors[GGN]; } else if (!FI.PoolDescriptors[N]) { // Otherwise, if it was not passed in from outside the function, it must // be a local pool! assert(!N->isGlobalNode() && "Should be in global mapping!"); FI.PoolDescriptors[N] = CreateLocalPool(N, NewF); } } TransformBody(NewF, FI, DS); }
// // Method: visitCallSite() // // Description: // This method transforms a call site. A call site may either be a call // instruction or an invoke instruction. // // Inputs: // CS - The call site representing the instruction that should be transformed. // void FuncTransform::visitCallSite(CallSite& CS) { const Function *CF = CS.getCalledFunction(); Instruction *TheCall = CS.getInstruction(); bool thread_creation_point = false; // // Get the value that is called at this call site. Strip away any pointer // casts that do not change the representation of the data (i.e., are // lossless casts). // Value * CalledValue = CS.getCalledValue()->stripPointerCasts(); // // The CallSite::getCalledFunction() method is not guaranteed to strip off // pointer casts. If no called function was found, manually strip pointer // casts off of the called value and see if we get a function. If so, this // is a direct call, and we want to update CF accordingly. // if (!CF) CF = dyn_cast<Function>(CalledValue); // // Do not change any inline assembly code. // if (isa<InlineAsm>(TheCall->getOperand(0))) { errs() << "INLINE ASM: ignoring. Hoping that's safe.\n"; return; } // // Ignore calls to NULL pointers or undefined values. // if ((isa<ConstantPointerNull>(CalledValue)) || (isa<UndefValue>(CalledValue))) { errs() << "WARNING: Ignoring call using NULL/Undef function pointer.\n"; return; } // If this function is one of the memory manipulating functions built into // libc, emulate it with pool calls as appropriate. if (CF && CF->isDeclaration()) { std::string Name = CF->getName(); if (Name == "free" || Name == "cfree") { visitFreeCall(CS); return; } else if (Name == "malloc") { visitMallocCall(CS); return; } else if (Name == "calloc") { visitCallocCall(CS); return; } else if (Name == "realloc") { visitReallocCall(CS); return; } else if (Name == "memalign" || Name == "posix_memalign") { visitMemAlignCall(CS); return; } else if (Name == "strdup") { visitStrdupCall(CS); return; } else if (Name == "valloc") { errs() << "VALLOC USED BUT NOT HANDLED!\n"; abort(); } else if (unsigned PoolArgc = PAInfo.getNumInitialPoolArguments(Name)) { visitRuntimeCheck(CS, PoolArgc); return; } else if (Name == "pthread_create") { thread_creation_point = true; // // Get DSNode representing the DSNode of the function pointer Value of // the pthread_create call // DSNode* thread_callee_node = G->getNodeForValue(CS.getArgument(2)).getNode(); if (!thread_callee_node) { assert(0 && "apparently you need this code"); FuncInfo *CFI = PAInfo.getFuncInfo(*CF); thread_callee_node = G->getNodeForValue(CFI->MapValueToOriginal(CS.getArgument(2))).getNode(); } // Fill in CF with the name of one of the functions in thread_callee_node CF = const_cast<Function*>(dyn_cast<Function>(*thread_callee_node->globals_begin())); } } // // We need to figure out which local pool descriptors correspond to the pool // descriptor arguments passed into the function call. Calculate a mapping // from callee DSNodes to caller DSNodes. We construct a partial isomophism // between the graphs to figure out which pool descriptors need to be passed // in. The roots of this mapping is found from arguments and return values. // DataStructures& Graphs = PAInfo.getGraphs(); DSGraph::NodeMapTy NodeMapping; Instruction *NewCall; Value *NewCallee; std::vector<const DSNode*> ArgNodes; DSGraph *CalleeGraph; // The callee graph // For indirect callees, find any callee since all DS graphs have been // merged. if (CF) { // Direct calls are nice and simple. DEBUG(errs() << " Handling direct call: " << *TheCall << "\n"); // // Do not try to add pool handles to the function if it: // a) Already calls a cloned function; or // b) Calls a function which was never cloned. // // For such a call, just replace any arguments that take original functions // with their cloned function poiner values. // FuncInfo *CFI = PAInfo.getFuncInfo(*CF); if (CFI == 0 || CFI->Clone == 0) { // Nothing to transform... visitInstruction(*TheCall); return; } // // Oh, dear. We must add pool descriptors to this direct call. // NewCallee = CFI->Clone; ArgNodes = CFI->ArgNodes; assert ((Graphs.hasDSGraph (*CF)) && "Function has no ECGraph!\n"); CalleeGraph = Graphs.getDSGraph(*CF); } else { DEBUG(errs() << " Handling indirect call: " << *TheCall << "\n"); DSGraph *G = Graphs.getGlobalsGraph(); DSGraph::ScalarMapTy& SM = G->getScalarMap(); // Here we fill in CF with one of the possible called functions. Because we // merged together all of the arguments to all of the functions in the // equivalence set, it doesn't really matter which one we pick. // (If the function was cloned, we have to map the cloned call instruction // in CS back to the original call instruction.) Instruction *OrigInst = cast<Instruction>(getOldValueIfAvailable(CS.getInstruction())); // // Attempt to get one of the function targets of this indirect call site by // looking at the call graph constructed by the points-to analysis. Be // sure to use the original call site from the original function; the // points-to analysis has no information on the clones we've created. // // Also, look for the target that has the greatest number of arguments that // have associated DSNodes. This ensures that we pass the maximum number // of pools possible and prevents us from eliding a pool because we're // examining a target that doesn't need it. // const DSCallGraph & callGraph = Graphs.getCallGraph(); DSCallGraph::callee_iterator I = callGraph.callee_begin(OrigInst); for (; I != callGraph.callee_end(OrigInst); ++I) { for(DSCallGraph::scc_iterator sccii = callGraph.scc_begin(*I), sccee = callGraph.scc_end(*I); sccii != sccee; ++sccii){ if(SM.find(SM.getLeaderForGlobal(*sccii)) == SM.end()) continue; // // Get the information for this function. Since this is coming from // DSA, it should be an original function. // // This call site calls a function, that is not defined in this module if (!(Graphs.hasDSGraph(**sccii))) return; // For all other cases Func Info must exist. PAInfo.getFuncInfo(**sccii); // // If this target takes more DSNodes than the last one we found, then // make *this* target our canonical target. // CF = *sccii; break; } } if(!CF){ const Function *F1 = OrigInst->getParent()->getParent(); F1 = callGraph.sccLeader(&*F1); for(DSCallGraph::scc_iterator sccii = callGraph.scc_begin(F1), sccee = callGraph.scc_end(F1); sccii != sccee; ++sccii){ if(SM.find(SM.getLeaderForGlobal(*sccii)) == SM.end()) continue; // // Get the information for this function. Since this is coming from DSA, // it should be an original function. // // This call site calls a function, that is not defined in this module if (!(Graphs.hasDSGraph(**sccii))) return; // For all other cases Func Info must exist. PAInfo.getFuncInfo(**sccii); // // If this target takes more DSNodes than the last one we found, then // make *this* target our canonical target. // CF = *sccii; } } // Assuming the call graph is always correct. And if the call graph reports, // no callees, we can assume that it is right. // // If we didn't find the callee in the constructed call graph, try // checking in the DSNode itself. // This isn't ideal as it means that this call site didn't have inlining // happen. // // // If we still haven't been able to find a target function of the call site // to transform, do nothing. // // One may be tempted to think that we should always have at least one // target, but this is not true. There are perfectly acceptable (but // strange) programs for which no function targets exist. Function // pointers loaded from undef values, for example, will have no targets. // if (!CF) return; // // It's possible that this program has indirect call targets that are // not defined in this module. Do not transformation for such functions. // if (!(Graphs.hasDSGraph(*CF))) return; // // Get the common graph for the set of functions this call may invoke. // assert ((Graphs.hasDSGraph(*CF)) && "Function has no DSGraph!\n"); CalleeGraph = Graphs.getDSGraph(*CF); #ifndef NDEBUG // Verify that all potential callees at call site have the same DS graph. DSCallGraph::callee_iterator E = Graphs.getCallGraph().callee_end(OrigInst); for (; I != E; ++I) { const Function * F = *I; assert (F); if (!(F)->isDeclaration()) assert(CalleeGraph == Graphs.getDSGraph(**I) && "Callees at call site do not have a common graph!"); } #endif // Find the DS nodes for the arguments that need to be added, if any. FuncInfo *CFI = PAInfo.getFuncInfo(*CF); assert(CFI && "No function info for callee at indirect call?"); ArgNodes = CFI->ArgNodes; if (ArgNodes.empty()) return; // No arguments to add? Transformation is a noop! // Cast the function pointer to an appropriate type! std::vector<Type*> ArgTys(ArgNodes.size(), PoolAllocate::PoolDescPtrTy); for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) ArgTys.push_back((*I)->getType()); FunctionType *FTy = FunctionType::get(TheCall->getType(), ArgTys, false); PointerType *PFTy = PointerType::getUnqual(FTy); // If there are any pool arguments cast the func ptr to the right type. NewCallee = CastInst::CreatePointerCast(CS.getCalledValue(), PFTy, "tmp", TheCall); } // // FIXME: Why do we disable strict checking when calling the // DSGraph::computeNodeMapping() method? // Function::const_arg_iterator FAI = CF->arg_begin(), E = CF->arg_end(); CallSite::arg_iterator AI = CS.arg_begin() + (thread_creation_point ? 3 : 0); CallSite::arg_iterator AE = CS.arg_end(); for ( ; FAI != E && AI != AE; ++FAI, ++AI) if (!isa<Constant>(*AI)) { DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(FAI), getDSNodeHFor(*AI), NodeMapping, false); } //assert(AI == AE && "Varargs calls not handled yet!"); // Map the return value as well... if (isa<PointerType>(TheCall->getType())) DSGraph::computeNodeMapping(CalleeGraph->getReturnNodeFor(*CF), getDSNodeHFor(TheCall), NodeMapping, false); // This code seems redundant (and crashes occasionally) // There is no reason to map globals here, since they are not passed as // arguments // // Map the nodes that are pointed to by globals. // DSScalarMap &CalleeSM = CalleeGraph->getScalarMap(); // for (DSScalarMap::global_iterator GI = G.getScalarMap().global_begin(), // E = G.getScalarMap().global_end(); GI != E; ++GI) // if (CalleeSM.count(*GI)) // DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(*GI), // getDSNodeHFor(*GI), // NodeMapping, false); // // Okay, now that we have established our mapping, we can figure out which // pool descriptors to pass in... // // Note: // There used to be code here that would create a new pool before the // function call and destroy it after the function call. This could would // get triggered if bounds checking was disbled or the DSNode for the // argument was an array value. // // I believe that code was incorrect; an argument may have a NULL pool handle // (i.e., no pool handle) because the pool allocation heuristic used simply // decided not to assign that value a pool. The argument may alias data // that should not be freed after the function call is complete, so calling // pooldestroy() after the call would free data, causing dangling pointer // dereference errors. // std::vector<Value*> Args; for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) { Value *ArgVal = Constant::getNullValue(PoolAllocate::PoolDescPtrTy); if (NodeMapping.count(ArgNodes[i])) { if (DSNode *LocalNode = NodeMapping[ArgNodes[i]].getNode()) if (FI.PoolDescriptors.count(LocalNode)) ArgVal = FI.PoolDescriptors.find(LocalNode)->second; } Args.push_back(ArgVal); } // Add the rest of the arguments unless we're a thread creation point, in which case we only need the pools if(!thread_creation_point) Args.insert(Args.end(), CS.arg_begin(), CS.arg_end()); // // There are circumstances where a function is casted to another type and // then called (que horible). We need to perform a similar cast if the // type doesn't match the number of arguments. // if (Function * NewFunction = dyn_cast<Function>(NewCallee)) { FunctionType * NewCalleeType = NewFunction->getFunctionType(); if (NewCalleeType->getNumParams() != Args.size()) { std::vector<Type *> Types; Type * FuncTy = FunctionType::get (NewCalleeType->getReturnType(), Types, true); FuncTy = PointerType::getUnqual (FuncTy); NewCallee = new BitCastInst (NewCallee, FuncTy, "", TheCall); } } std::string Name = TheCall->getName(); TheCall->setName(""); if(thread_creation_point) { Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); Value* pthread_replacement = M->getFunction("poolalloc_pthread_create"); std::vector<Value*> thread_args; //Push back original thread arguments through the callee thread_args.push_back(CS.getArgument(0)); thread_args.push_back(CS.getArgument(1)); thread_args.push_back(CS.getArgument(2)); //Push back the integer argument saying how many uses there are thread_args.push_back(Constant::getIntegerValue(llvm::Type::getInt32Ty(M->getContext()),APInt(32,Args.size()))); thread_args.insert(thread_args.end(),Args.begin(),Args.end()); thread_args.push_back(CS.getArgument(3)); //Make the thread creation call NewCall = CallInst::Create(pthread_replacement, thread_args, Name,TheCall); } else if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { NewCall = InvokeInst::Create (NewCallee, II->getNormalDest(), II->getUnwindDest(), Args, Name, TheCall); } else { NewCall = CallInst::Create (NewCallee, Args, Name, TheCall); } // Add all of the uses of the pool descriptor for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) AddPoolUse(*NewCall, Args[i], PoolUses); TheCall->replaceAllUsesWith(NewCall); DEBUG(errs() << " Result Call: " << *NewCall << "\n"); if (!TheCall->getType()->isVoidTy()) { // If we are modifying the original function, update the DSGraph... DSGraph::ScalarMapTy &SM = G->getScalarMap(); DSGraph::ScalarMapTy::iterator CII = SM.find(TheCall); if (CII != SM.end()) { SM[NewCall] = CII->second; SM.erase(CII); // Destroy the CallInst } else if (!FI.NewToOldValueMap.empty()) { // Otherwise, if this is a clone, update the NewToOldValueMap with the new // CI return value. UpdateNewToOldValueMap(TheCall, NewCall); } } else if (!FI.NewToOldValueMap.empty()) { UpdateNewToOldValueMap(TheCall, NewCall); } // // Copy over the calling convention and attributes of the original call // instruction to the new call instruction. // CallSite(NewCall).setCallingConv(CallSite(TheCall).getCallingConv()); TheCall->eraseFromParent(); visitInstruction(*NewCall); }
void RTAssociate::replaceCall(CallSite CS, FuncInfo& FI, DataStructures* DS) { const Function *CF = CS.getCalledFunction(); Instruction *TheCall = CS.getInstruction(); // If the called function is casted from one function type to another, peer // into the cast instruction and pull out the actual function being called. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CS.getCalledValue())) if (CE->getOpcode() == Instruction::BitCast && isa<Function>(CE->getOperand(0))) CF = cast<Function>(CE->getOperand(0)); if (isa<InlineAsm>(TheCall->getOperand(0))) { errs() << "INLINE ASM: ignoring. Hoping that's safe.\n"; return; } // Ignore calls to NULL pointers. if (isa<ConstantPointerNull>(CS.getCalledValue())) { errs() << "WARNING: Ignoring call using NULL function pointer.\n"; return; } // We need to figure out which local pool descriptors correspond to the pool // descriptor arguments passed into the function call. Calculate a mapping // from callee DSNodes to caller DSNodes. We construct a partial isomophism // between the graphs to figure out which pool descriptors need to be passed // in. The roots of this mapping is found from arguments and return values. // DSGraph::NodeMapTy NodeMapping; Instruction *NewCall; Value *NewCallee; std::vector<const DSNode*> ArgNodes; DSGraph *CalleeGraph; // The callee graph // For indirect callees, find any callee since all DS graphs have been // merged. if (CF) { // Direct calls are nice and simple. DEBUG(errs() << " Handling direct call: " << *TheCall); FuncInfo *CFI = getFuncInfo(CF); if (CFI == 0 || CFI->Clone == 0) // Nothing to transform... return; NewCallee = CFI->Clone; ArgNodes = CFI->ArgNodes; assert ((DS->hasDSGraph (*CF)) && "Function has no ECGraph!\n"); CalleeGraph = DS->getDSGraph(*CF); } else { DEBUG(errs() << " Handling indirect call: " << *TheCall); // Here we fill in CF with one of the possible called functions. Because we // merged together all of the arguments to all of the functions in the // equivalence set, it doesn't really matter which one we pick. // (If the function was cloned, we have to map the cloned call instruction // in CS back to the original call instruction.) Instruction *OrigInst = cast<Instruction>(FI.getOldValueIfAvailable(CS.getInstruction())); DSCallGraph::callee_iterator I = DS->getCallGraph().callee_begin(CS); if (I != DS->getCallGraph().callee_end(CS)) CF = *I; // If we didn't find the callee in the constructed call graph, try // checking in the DSNode itself. // This isn't ideal as it means that this call site didn't have inlining // happen. if (!CF) { DSGraph* dg = DS->getDSGraph(*OrigInst->getParent()->getParent()); DSNode* d = dg->getNodeForValue(OrigInst->getOperand(0)).getNode(); assert (d && "No DSNode!\n"); std::vector<const Function*> g; d->addFullFunctionList(g); if (g.size()) { EquivalenceClasses< const GlobalValue *> & EC = dg->getGlobalECs(); for(std::vector<const Function*>::const_iterator ii = g.begin(), ee = g.end(); !CF && ii != ee; ++ii) { for (EquivalenceClasses<const GlobalValue *>::member_iterator MI = EC.findLeader(*ii); MI != EC.member_end(); ++MI) // Loop over members in this set. if ((CF = dyn_cast<Function>(*MI))) { break; } } } } // // Do an assert unless we're bugpointing something. // // if ((UsingBugpoint) && (!CF)) return; if (!CF) errs() << "No Graph for CallSite in " << TheCall->getParent()->getParent()->getName().str() << " originally " << OrigInst->getParent()->getParent()->getName().str() << "\n"; assert (CF && "No call graph info"); // Get the common graph for the set of functions this call may invoke. // if (UsingBugpoint && (!(Graphs.hasDSGraph(*CF)))) return; assert ((DS->hasDSGraph(*CF)) && "Function has no DSGraph!\n"); CalleeGraph = DS->getDSGraph(*CF); #ifndef NDEBUG // Verify that all potential callees at call site have the same DS graph. DSCallGraph::callee_iterator E = DS->getCallGraph().callee_end(CS); for (; I != E; ++I) if (!(*I)->isDeclaration()) assert(CalleeGraph == DS->getDSGraph(**I) && "Callees at call site do not have a common graph!"); #endif // Find the DS nodes for the arguments that need to be added, if any. FuncInfo *CFI = getFuncInfo(CF); assert(CFI && "No function info for callee at indirect call?"); ArgNodes = CFI->ArgNodes; if (ArgNodes.empty()) return; // No arguments to add? Transformation is a noop! // Cast the function pointer to an appropriate type! std::vector<Type*> ArgTys(ArgNodes.size(), PoolDescPtrTy); for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) ArgTys.push_back((*I)->getType()); FunctionType *FTy = FunctionType::get(TheCall->getType(), ArgTys, false); PointerType *PFTy = PointerType::getUnqual(FTy); // If there are any pool arguments cast the func ptr to the right type. NewCallee = CastInst::CreatePointerCast(CS.getCalledValue(), PFTy, "tmp", TheCall); } Function::const_arg_iterator FAI = CF->arg_begin(), E = CF->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for ( ; FAI != E && AI != AE; ++FAI, ++AI) if (!isa<Constant>(*AI)) DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(FAI), FI.getDSNodeHFor(*AI), NodeMapping, false); assert(AI == AE && "Varargs calls not handled yet!"); // Map the return value as well... if (isa<PointerType>(TheCall->getType())) DSGraph::computeNodeMapping(CalleeGraph->getReturnNodeFor(*CF), FI.getDSNodeHFor(TheCall), NodeMapping, false); // Okay, now that we have established our mapping, we can figure out which // pool descriptors to pass in... std::vector<Value*> Args; for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) { Value *ArgVal = Constant::getNullValue(PoolDescPtrTy); if (NodeMapping.count(ArgNodes[i])) if (DSNode *LocalNode = NodeMapping[ArgNodes[i]].getNode()) if (FI.PoolDescriptors.count(LocalNode)) ArgVal = FI.PoolDescriptors.find(LocalNode)->second; if (isa<Constant > (ArgVal) && cast<Constant > (ArgVal)->isNullValue()) errs() << "WARNING: NULL POOL ARGUMENTS ARE PASSED IN!\n"; Args.push_back(ArgVal); } // Add the rest of the arguments... Args.insert(Args.end(), CS.arg_begin(), CS.arg_end()); // // There are circumstances where a function is casted to another type and // then called (que horible). We need to perform a similar cast if the // type doesn't match the number of arguments. // if (Function * NewFunction = dyn_cast<Function>(NewCallee)) { FunctionType * NewCalleeType = NewFunction->getFunctionType(); if (NewCalleeType->getNumParams() != Args.size()) { std::vector<Type *> Types; Type * FuncTy = FunctionType::get (NewCalleeType->getReturnType(), Types, true); FuncTy = PointerType::getUnqual (FuncTy); NewCallee = new BitCastInst (NewCallee, FuncTy, "", TheCall); } } std::string Name = TheCall->getName(); TheCall->setName(""); if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { NewCall = InvokeInst::Create (NewCallee, II->getNormalDest(), II->getUnwindDest(), Args, Name, TheCall); } else { NewCall = CallInst::Create (NewCallee, Args, Name, TheCall); } TheCall->replaceAllUsesWith(NewCall); DEBUG(errs() << " Result Call: " << *NewCall); if (TheCall->getType()->getTypeID() != Type::VoidTyID) { // If we are modifying the original function, update the DSGraph... DSGraph::ScalarMapTy &SM = FI.G->getScalarMap(); DSGraph::ScalarMapTy::iterator CII = SM.find(TheCall); if (CII != SM.end()) { SM[NewCall] = CII->second; SM.erase(CII); // Destroy the CallInst } else if (!FI.NewToOldValueMap.empty()) { // Otherwise, if this is a clone, update the NewToOldValueMap with the new // CI return value. FI.UpdateNewToOldValueMap(TheCall, NewCall); } } else if (!FI.NewToOldValueMap.empty()) { FI.UpdateNewToOldValueMap(TheCall, NewCall); } //FIXME: attributes on call? CallSite(NewCall).setCallingConv(CallSite(TheCall).getCallingConv()); TheCall->eraseFromParent(); }
/// visitGraph - Visit the functions in the specified graph, updating the /// specified lattice values for all of their uses. /// void StructureFieldVisitorBase:: visitGraph(DSGraph &DSG, std::multimap<DSNode*, LatticeValue*> &NodeLVs) { assert(!NodeLVs.empty() && "No lattice values to compute!"); // To visit a graph, first step, we visit the instruction making up each // function in the graph, but ignore calls when processing them. We handle // call nodes explicitly by looking at call nodes in the graph if needed. We // handle instructions before calls to avoid interprocedural analysis if we // can drive lattice values to bottom early. // SFVInstVisitor IV(DSG, Callbacks, NodeLVs); for (DSGraph::retnodes_iterator FI = DSG.retnodes_begin(), E = DSG.retnodes_end(); FI != E; ++FI) for (Function::iterator BB = FI->first->begin(), E = FI->first->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (IV.visit(*I) && NodeLVs.empty()) return; // Nothing left to analyze. // Keep track of which actual direct callees are handled. std::set<Function*> CalleesHandled; // Once we have visited all of the instructions in the function bodies, if // there are lattice values that have not been driven to bottom, see if any of // the nodes involved are passed into function calls. If so, we potentially // have to recursively traverse the call graph. for (DSGraph::fc_iterator CS = DSG.fc_begin(), E = DSG.fc_end(); CS != E; ++CS) { // Figure out the mapping from a node in the caller (potentially several) // nodes in the callee. DSGraph::NodeMapTy CallNodeMap; Instruction *TheCall = CS->getCallSite().getInstruction(); // If this is an indirect function call, assume nothing gets passed through // it. FIXME: THIS IS BROKEN! Just get the ECG for the fn ptr if it's not // direct. if (CS->isIndirectCall()) continue; // If this is an external function call, it cannot be involved with this // node, because otherwise the node would be marked incomplete! if (CS->getCalleeFunc()->isExternal()) continue; // If we can handle this function call, remove it from the set of direct // calls found by the visitor. CalleesHandled.insert(CS->getCalleeFunc()); std::vector<DSNodeHandle> Args; DSGraph *CG = &ECG.getDSGraph(*CS->getCalleeFunc()); CG->getFunctionArgumentsForCall(CS->getCalleeFunc(), Args); if (!CS->getRetVal().isNull()) DSGraph::computeNodeMapping(Args[0], CS->getRetVal(), CallNodeMap); for (unsigned i = 0, e = CS->getNumPtrArgs(); i != e; ++i) { if (i == Args.size()-1) break; DSGraph::computeNodeMapping(Args[i+1], CS->getPtrArg(i), CallNodeMap); } Args.clear(); // The mapping we just computed maps from nodes in the callee to nodes in // the caller, so we can't query it efficiently. Instead of going through // the trouble of inverting the map to do this (linear time with the size of // the mapping), we just do a linear search to see if any affected nodes are // passed into this call. bool CallCanModifyDataFlow = false; for (DSGraph::NodeMapTy::iterator MI = CallNodeMap.begin(), E = CallNodeMap.end(); MI != E; ++MI) if (NodeLVs.count(MI->second.getNode())) // Okay, the node is passed in, check to see if the call might do // something interesting to it (i.e. if analyzing the call can produce // anything other than "top"). if ((CallCanModifyDataFlow = NodeCanPossiblyBeInteresting(MI->first, Callbacks))) break; // If this function call cannot impact the analysis (either because the // nodes we are tracking are not passed into the call, or the DSGraph for // the callee tells us that analysis of the callee can't provide interesting // information), ignore it. if (!CallCanModifyDataFlow) continue; // Okay, either compute analysis results for the callee function, or reuse // results previously computed. std::multimap<DSNode*, LatticeValue*> &CalleeFacts = getCalleeFacts(*CG); // Merge all of the facts for the callee into the facts for the caller. If // this reduces anything in the caller to 'bottom', remove them. for (DSGraph::NodeMapTy::iterator MI = CallNodeMap.begin(), E = CallNodeMap.end(); MI != E; ++MI) { // If we have Lattice facts in the caller for this node in the callee, // merge any information from the callee into the caller. // If the node is not accessed in the callee at all, don't update. if (MI->first->getType() == Type::VoidTy) continue; // If there are no data-flow facts live in the caller for this node, don't // both processing it. std::multimap<DSNode*, LatticeValue*>::iterator NLVI = NodeLVs.find(MI->second.getNode()); if (NLVI == NodeLVs.end()) continue; // Iterate over all of the lattice values that have corresponding fields // in the callee, merging in information as we go. Be careful about the // fact that the callee may get passed the address of a substructure and // other funny games. //if (CalleeFacts.count(const_cast<DSNode*>(MI->first)) == 0) { DSNode *CalleeNode = const_cast<DSNode*>(MI->first); unsigned CalleeNodeOffset = MI->second.getOffset(); while (NLVI->first == MI->second.getNode()) { // Figure out what offset in the callee this field would land. unsigned FieldOff = NLVI->second->getFieldOffset()+CalleeNodeOffset; // If the field is not within the callee node, ignore it. if (FieldOff >= CalleeNode->getSize()) { ++NLVI; continue; } // Okay, check to see if we have a lattice value for the field at offset // FieldOff in the callee node. const LatticeValue *CalleeLV = 0; std::multimap<DSNode*, LatticeValue*>::iterator CFI = CalleeFacts.lower_bound(CalleeNode); for (; CFI != CalleeFacts.end() && CFI->first == CalleeNode; ++CFI) if (CFI->second->getFieldOffset() == FieldOff) { CalleeLV = CFI->second; // Found it! break; } // If we don't, the lattice value hit bottom and we should remove the // lattice value in the caller. if (!CalleeLV) { delete NLVI->second; // The lattice value hit bottom. NodeLVs.erase(NLVI++); continue; } // Finally, if we did find a corresponding entry, merge the information // into the caller's lattice value and keep going. if (NLVI->second->mergeInValue(CalleeLV)) { // Okay, merging these two caused the caller value to hit bottom. // Remove it. delete NLVI->second; // The lattice value hit bottom. NodeLVs.erase(NLVI++); } ++NLVI; // We successfully merged in some information! } // If we ran out of facts to prove, just exit. if (NodeLVs.empty()) return; } } // The local analysis pass inconveniently discards many local function calls // from the graph if they are to known functions. Loop over direct function // calls not handled above and visit them as appropriate. while (!IV.DirectCallSites.empty()) { Instruction *Call = *IV.DirectCallSites.begin(); IV.DirectCallSites.erase(IV.DirectCallSites.begin()); // Is this one actually handled by DSA? if (CalleesHandled.count(cast<Function>(Call->getOperand(0)))) continue; // Collect the pointers involved in this call. std::vector<Value*> Pointers; if (isa<PointerType>(Call->getType())) Pointers.push_back(Call); for (unsigned i = 1, e = Call->getNumOperands(); i != e; ++i) if (isa<PointerType>(Call->getOperand(i)->getType())) Pointers.push_back(Call->getOperand(i)); // If this is an intrinsic function call, figure out which one. unsigned IID = cast<Function>(Call->getOperand(0))->getIntrinsicID(); for (unsigned i = 0, e = Pointers.size(); i != e; ++i) { // If any of our lattice values are passed into this call, which is // specially handled by the local analyzer, inform the lattice function. DSNode *N = DSG.getNodeForValue(Pointers[i]).getNode(); for (std::multimap<DSNode*, LatticeValue*>::iterator LVI = NodeLVs.lower_bound(N); LVI != NodeLVs.end() && LVI->first == N;) { bool AtBottom = false; switch (IID) { default: AtBottom = LVI->second->visitRecognizedCall(*Call); break; case Intrinsic::memset: if (Callbacks & Visit::Stores) AtBottom = LVI->second->visitMemSet(*cast<CallInst>(Call)); break; } if (AtBottom) { delete LVI->second; NodeLVs.erase(LVI++); } else { ++LVI; } } } } }
/// ProcessNodesReachableFromGlobals - If we inferred anything about nodes /// reachable from globals, we have to make sure that we incorporate data for /// all graphs that include those globals due to the nature of the globals /// graph. /// void StructureFieldVisitorBase:: ProcessNodesReachableFromGlobals(DSGraph &DSG, std::multimap<DSNode*,LatticeValue*> &NodeLVs){ // Start by marking all nodes reachable from globals. DSScalarMap &SM = DSG.getScalarMap(); if (SM.global_begin() == SM.global_end()) return; hash_set<const DSNode*> Reachable; for (DSScalarMap::global_iterator GI = SM.global_begin(), E = SM.global_end(); GI != E; ++GI) SM[*GI].getNode()->markReachableNodes(Reachable); if (Reachable.empty()) return; // If any of the nodes with dataflow facts are reachable from the globals // graph, we have to do the GG processing step. bool MustProcessThroughGlobalsGraph = false; for (std::multimap<DSNode*, LatticeValue*>::iterator I = NodeLVs.begin(), E = NodeLVs.end(); I != E; ++I) if (Reachable.count(I->first)) { MustProcessThroughGlobalsGraph = true; break; } if (!MustProcessThroughGlobalsGraph) return; Reachable.clear(); // Compute the mapping from DSG to the globals graph. DSGraph::NodeMapTy DSGToGGMap; DSG.computeGToGGMapping(DSGToGGMap); // Most of the times when we find facts about things reachable from globals we // we are in the main graph. This means that we have *all* of the globals // graph in this DSG. To be efficient, we compute the minimum set of globals // that can reach any of the NodeLVs facts. // // I'm not aware of any wonderful way of computing the set of globals that // points to the set of nodes in NodeLVs that is not N^2 in either NodeLVs or // the number of globals, except to compute the inverse of DSG. As such, we // compute the inverse graph of DSG, which basically has the edges going from // pointed to nodes to pointing nodes. Because we only care about one // connectedness properties, we ignore field info. In addition, we only // compute inverse of the portion of the graph reachable from the globals. std::set<std::pair<DSNode*,DSNode*> > InverseGraph; for (DSScalarMap::global_iterator GI = SM.global_begin(), E = SM.global_end(); GI != E; ++GI) ComputeInverseGraphFrom(SM[*GI].getNode(), InverseGraph); // Okay, now that we have our bastardized inverse graph, compute the set of // globals nodes reachable from our lattice nodes. for (std::multimap<DSNode*, LatticeValue*>::iterator I = NodeLVs.begin(), E = NodeLVs.end(); I != E; ++I) ComputeNodesReachableFrom(I->first, InverseGraph, Reachable); // Now that we know which nodes point to the data flow facts, figure out which // globals point to the data flow facts. std::set<GlobalValue*> Globals; for (hash_set<const DSNode*>::iterator I = Reachable.begin(), E = Reachable.end(); I != E; ++I) Globals.insert((*I)->globals_begin(), (*I)->globals_end()); // Finally, loop over all of the DSGraphs for the program, computing // information for the graph if not done already, mapping the result into our // context. for (hash_map<const Function*, DSGraph*>::iterator GI = ECG.DSInfo.begin(), E = ECG.DSInfo.end(); GI != E; ++GI) { DSGraph &FG = *GI->second; // Graphs can contain multiple functions, only process the graph once. if (GI->first != FG.retnodes_begin()->first || // Also, do not bother reprocessing DSG. &FG == &DSG) continue; bool GraphUsesGlobal = false; for (std::set<GlobalValue*>::iterator I = Globals.begin(), E = Globals.end(); I != E; ++I) if (FG.getScalarMap().count(*I)) { GraphUsesGlobal = true; break; } // If this graph does not contain the global at all, there is no reason to // even think about it. if (!GraphUsesGlobal) continue; // Otherwise, compute the full set of dataflow effects of the function. std::multimap<DSNode*, LatticeValue*> &FGF = getCalleeFacts(FG); //std::cerr << "Computed: " << FG.getFunctionNames() << "\n"; #if 0 for (std::multimap<DSNode*, LatticeValue*>::iterator I = FGF.begin(), E = FGF.end(); I != E; ++I) I->second->dump(); #endif // Compute the mapping of nodes in the globals graph to the function's // graph. Note that this function graph may not have nodes (or may have // fragments of full nodes) in the globals graph, and we don't want this to // pessimize the analysis. std::multimap<const DSNode*, std::pair<DSNode*,int> > GraphMap; DSGraph::NodeMapTy GraphToGGMap; FG.computeGToGGMapping(GraphToGGMap); // "Invert" the mapping. We compute the mapping from the start of a global // graph node to a place in the graph's node. Note that not all of the GG // node may be present in the graphs node, so there may be a negative offset // involved. while (!GraphToGGMap.empty()) { DSNode *GN = const_cast<DSNode*>(GraphToGGMap.begin()->first); DSNodeHandle &GGNH = GraphToGGMap.begin()->second; GraphMap.insert(std::make_pair(GGNH.getNode(), std::make_pair(GN, -GGNH.getOffset()))); GraphToGGMap.erase(GraphToGGMap.begin()); } // Loop over all of the dataflow facts that we have computed, mapping them // to the globals graph. for (std::multimap<DSNode*, LatticeValue*>::iterator I = NodeLVs.begin(), E = NodeLVs.end(); I != E; ) { bool FactHitBottom = false; //I->second->dump(); assert(I->first->getParentGraph() == &DSG); assert(I->second->getNode()->getParentGraph() == &DSG); // Node is in the GG? DSGraph::NodeMapTy::iterator DSGToGGMapI = DSGToGGMap.find(I->first); if (DSGToGGMapI != DSGToGGMap.end()) { DSNodeHandle &GGNH = DSGToGGMapI->second; const DSNode *GGNode = GGNH.getNode(); unsigned DSGToGGOffset = GGNH.getOffset(); // See if there is a node in FG that corresponds to this one. If not, // no information will be computed in this scope, as the memory is not // accessed. std::multimap<const DSNode*, std::pair<DSNode*,int> >::iterator GMI = GraphMap.find(GGNode); // LatticeValOffset - The offset from the start of the GG Node to the // start of the field we are interested in. unsigned LatticeValOffset = I->second->getFieldOffset()+DSGToGGOffset; // Loop over all of the nodes in FG that correspond to this single node // in the GG. for (; GMI != GraphMap.end() && GMI->first == GGNode; ++GMI) { // Compute the offset to the field in the user graph. unsigned FieldOffset = LatticeValOffset - GMI->second.second; // If the field is within the amount of memory accessed by this scope, // then there must be a corresponding lattice value. DSNode *FGNode = GMI->second.first; if (FieldOffset < FGNode->getSize()) { LatticeValue *CorrespondingLV = 0; std::multimap<DSNode*, LatticeValue*>::iterator FGFI = FGF.find(FGNode); for (; FGFI != FGF.end() && FGFI->first == FGNode; ++FGFI) if (FGFI->second->getFieldOffset() == FieldOffset) { CorrespondingLV = FGFI->second; break; } // Finally, if either there was no corresponding fact (because it // hit bottom in this scope), or if merging the two pieces of // information makes it hit bottom, remember this. if (CorrespondingLV == 0 || I->second->mergeInValue(CorrespondingLV)) FactHitBottom = true; } } } if (FactHitBottom) { delete I->second; NodeLVs.erase(I++); if (NodeLVs.empty()) return; } else { ++I; } } } }