PSNodesSeq LLVMPointerSubgraphBuilder::createRealloc(const llvm::CallInst *CInst) { using namespace llvm; // we create new allocation node and memcpy old pointers there PSNode *orig_mem = getOperand(CInst->getOperand(0)); PSNodeAlloc *reall = PSNodeAlloc::get(PS.create(PSNodeType::DYN_ALLOC)); // copy everything that is in orig_mem to reall PSNode *mcp = PS.create(PSNodeType::MEMCPY, orig_mem, reall, Offset::UNKNOWN); // we need the pointer in the last node that we return PSNode *ptr = PS.create(PSNodeType::CONSTANT, reall, 0); reall->setIsHeap(); reall->setSize(getConstantSizeValue(CInst->getOperand(1))); reall->addSuccessor(mcp); mcp->addSuccessor(ptr); reall->setUserData(const_cast<llvm::CallInst *>(CInst)); PSNodesSeq ret = PSNodesSeq(reall, ptr); addNode(CInst, ret); return ret; }
PSNode * LLVMPointerSubgraphBuilder::handleGlobalVariableInitializer(const llvm::Constant *C, PSNode *node) { using namespace llvm; PSNode *last = node; // if the global is zero initialized, just set the zeroInitialized flag if (C->isNullValue()) { node->setZeroInitialized(); } else if (C->getType()->isAggregateType()) { uint64_t off = 0; for (auto I = C->op_begin(), E = C->op_end(); I != E; ++I) { const Value *val = *I; Type *Ty = val->getType(); if (Ty->isPointerTy()) { PSNode *op = getOperand(val); PSNode *target = new PSNode(CONSTANT, node, off); // FIXME: we're leaking the target // NOTE: mabe we could do something like // CONSTANT_STORE that would take Pointer instead of node?? // PSNode(CONSTANT_STORE, op, Pointer(node, off)) or // PSNode(COPY, op, Pointer(node, off))?? PSNode *store = new PSNode(STORE, op, target); store->insertAfter(last); last = store; } off += DL->getTypeAllocSize(Ty); } } else if (isa<ConstantExpr>(C) || isa<Function>(C) || C->getType()->isPointerTy()) { if (C->getType()->isPointerTy()) { PSNode *value = getOperand(C); assert(value->pointsTo.size() == 1 && "BUG: We should have constant"); // FIXME: we're leaking the target PSNode *store = new PSNode(STORE, value, node); store->insertAfter(last); last = store; } } else if (!isa<ConstantInt>(C)) { llvm::errs() << *C << "\n"; llvm::errs() << "ERROR: ^^^ global variable initializer not handled\n"; abort(); } return last; }
PSNodesSeq LLVMPointerSubgraphBuilder::buildGlobals() { PSNode *cur = nullptr, *prev, *first = nullptr; // create PointerSubgraph nodes for (auto I = M->global_begin(), E = M->global_end(); I != E; ++I) { prev = cur; // every global node is like memory allocation PSNodeAlloc *nd = PSNodeAlloc::get(PS.create(PSNodeType::ALLOC)); nd->setIsGlobal(); cur = nd; addNode(&*I, cur); if (prev) prev->addSuccessor(cur); else first = cur; } // only now handle the initializers - we need to have then // built, because they can point to each other for (auto I = M->global_begin(), E = M->global_end(); I != E; ++I) { PSNodeAlloc *node = PSNodeAlloc::get(getNode(&*I)); assert(node && "BUG: Do not have global variable" " or it is not an allocation"); // handle globals initialization const llvm::GlobalVariable *GV = llvm::dyn_cast<llvm::GlobalVariable>(&*I); if (GV) { node->setSize(getAllocatedSize(GV, DL)); if (GV->hasInitializer() && !GV->isExternallyInitialized()) { const llvm::Constant *C = GV->getInitializer(); cur = handleGlobalVariableInitializer(C, node); } } else { // without initializer we can not do anything else than // assume that it can point everywhere cur = PS.create(PSNodeType::STORE, UNKNOWN_MEMORY, node); cur->insertAfter(node); } } assert((!first && !cur) || (first && cur)); return std::make_pair(first, cur); }
PSNodesSeq LLVMPointerSubgraphBuilder::createUnknownCall(const llvm::CallInst *CInst) { // This assertion must not hold if the call is wrapped // inside bitcast - it defaults to int, but is bitcased // to pointer //assert(CInst->getType()->isPointerTy()); PSNode *call = PS.create(PSNodeType::CALL, nullptr); call->setPairedNode(call); // the only thing that the node will point at call->addPointsTo(PointerUnknown); addNode(CInst, call); return std::make_pair(call, call); }
PSNodesSeq LLVMPointerSubgraphBuilder::createVarArg(const llvm::IntrinsicInst *Inst) { // just store all the pointers from vararg argument // to the memory given in vastart() on Offset::UNKNOWN. // It is the easiest thing we can do without any further // analysis // first we need to get the vararg argument phi const llvm::Function *F = Inst->getParent()->getParent(); Subgraph& subg = subgraphs_map[F]; PSNode *arg = subg.vararg; assert(F->isVarArg() && "vastart in a non-variadic function"); assert(arg && "Don't have variadic argument in a variadic function"); // vastart will be node that will keep the memory // with pointers, its argument is the alloca, that // alloca will keep pointer to vastart PSNode *vastart = PS.create(PSNodeType::ALLOC); // vastart has only one operand which is the struct // it uses for storing the va arguments. Strip it so that we'll // get the underlying alloca inst PSNode *op = getOperand(Inst->getOperand(0)->stripInBoundsOffsets()); // the argument is usually an alloca, but it may be a load // in the case the code was transformed by -reg2mem assert((op->getType() == PSNodeType::ALLOC || op->getType() == PSNodeType::LOAD) && "Argument of vastart is invalid"); // get node with the same pointer, but with Offset::UNKNOWN // FIXME: we're leaking it // make the memory in alloca point to our memory in vastart PSNode *ptr = PS.create(PSNodeType::GEP, op, Offset::UNKNOWN); PSNode *S1 = PS.create(PSNodeType::STORE, vastart, ptr); // and also make vastart point to the vararg args PSNode *S2 = PS.create(PSNodeType::STORE, arg, vastart); vastart->addSuccessor(ptr); ptr->addSuccessor(S1); S1->addSuccessor(S2); // set paired node to S2 for vararg, so that when adding structure, // we add the whole sequence (it adds from call-node to pair-node, // because of the old system where we did not store all sequences) // FIXME: fix this vastart->setPairedNode(S2); // FIXME: we're assuming that in a sequence in the nodes_map // is always the last node the 'real' node. In this case it is not true, // so add only the 'vastart', so that we have the mapping in nodes_map addNode(Inst, vastart); return PSNodesSeq(vastart, S2); }
PSNode * LLVMPointerSubgraphBuilder::createAsm(const llvm::Instruction *Inst) { // we filter irrelevant calls in isRelevantCall() // and we don't have assembler there at all. If // we are here, then we got here because this // is undefined call that returns pointer. // In this case return an unknown pointer static bool warned = false; if (!warned) { llvm::errs() << "PTA: Inline assembly found, analysis may be unsound\n"; warned = true; } PSNode *n = PS.create(PSNodeType::CONSTANT, UNKNOWN_MEMORY, Offset::UNKNOWN); // it is call that returns pointer, so we'd like to have // a 'return' node that contains that pointer n->setPairedNode(n); addNode(Inst, n); return n; }
PSNodesSeq LLVMPointerSubgraphBuilder::createMemSet(const llvm::Instruction *Inst) { PSNode *val; if (memsetIsZeroInitialization(llvm::cast<llvm::IntrinsicInst>(Inst))) val = NULLPTR; else // if the memset is not 0-initialized, it does some // garbage into the pointer val = UNKNOWN_MEMORY; PSNode *op = getOperand(Inst->getOperand(0)->stripInBoundsOffsets()); // we need to make unknown offsets PSNode *G = PS.create(PSNodeType::GEP, op, Offset::UNKNOWN); PSNode *S = PS.create(PSNodeType::STORE, val, G); G->addSuccessor(S); PSNodesSeq ret = PSNodesSeq(G, S); addNode(Inst, ret); return ret; }
PSNode * LLVMPointerSubgraphBuilder::handleGlobalVariableInitializer(const llvm::Constant *C, PSNodeAlloc *node, PSNode *last, uint64_t offset) { using namespace llvm; if (!last) last = node; // if the global is zero initialized, just set the zeroInitialized flag if (C->isNullValue()) { node->setZeroInitialized(); } else if (C->getType()->isAggregateType()) { uint64_t off = 0; for (auto I = C->op_begin(), E = C->op_end(); I != E; ++I) { const Constant *op = cast<Constant>(*I); Type *Ty = op->getType(); // recursively dive into the aggregate type last = handleGlobalVariableInitializer(op, node, last, offset + off); off += DL->getTypeAllocSize(Ty); } } else if (C->getType()->isPointerTy()) { PSNode *op = getOperand(C); PSNode *target = PS.create(PSNodeType::CONSTANT, node, offset); PSNode *store = PS.create(PSNodeType::STORE, op, target); store->insertAfter(last); last = store; } else if (isa<ConstantExpr>(C) || isa<Function>(C) || C->getType()->isPointerTy()) { if (C->getType()->isPointerTy()) { PSNode *value = getOperand(C); assert(value->pointsTo.size() == 1 && "BUG: We should have constant"); // FIXME: we're leaking the target PSNode *store = PS.create(PSNodeType::STORE, value, node); store->insertAfter(last); last = store; } } else if (isa<UndefValue>(C)) { // undef value means unknown memory PSNode *target = PS.create(PSNodeType::CONSTANT, node, offset); PSNode *store = PS.create(PSNodeType::STORE, UNKNOWN_MEMORY, target); store->insertAfter(last); last = store; } else if (!isa<ConstantInt>(C) && !isa<ConstantFP>(C)) { llvm::errs() << *C << "\n"; llvm::errs() << "ERROR: ^^^ global variable initializer not handled\n"; abort(); } return last; }
bool PointerAnalysis::processMemcpy(PSNode *node) { bool changed = false; // what to copy std::vector<MemoryObject *> srcObjects; // where to copy std::vector<MemoryObject *> destObjects; PSNode *srcNode = node->getOperand(0); PSNode *destNode = node->getOperand(1); /* if one is zero initialized and we copy it whole, * set the other zero initialized too */ if ((!destNode->isZeroInitialized() && srcNode->isZeroInitialized()) && ((*node->offset == 0 && node->len.isUnknown()) || node->offset.isUnknown())) { destNode->setZeroInitialized(); changed = true; } // gather srcNode pointer objects for (const Pointer& ptr : srcNode->pointsTo) { assert(ptr.target && "Got nullptr as target"); if (ptr.isNull()) continue; getMemoryObjects(node, ptr, srcObjects); } // gather destNode objects for (const Pointer& dptr : destNode->pointsTo) { assert(dptr.target && "Got nullptr as target"); if (dptr.isNull()) continue; getMemoryObjects(node, dptr, destObjects); } if (srcObjects.empty()){ if (srcNode->isZeroInitialized()) { // if the memory is zero initialized, // then everything is fine, we add nullptr changed |= node->addPointsTo(NULLPTR); } else { changed |= errorEmptyPointsTo(node, srcNode); } return changed; } for (MemoryObject *o : destObjects) { // copy every pointer from srcObjects that is in // the range to these objects for (MemoryObject *so : srcObjects) { for (auto& src : so->pointsTo) { // src.first is offset, src.second is a PointToSet // we need to copy ptrs at UNKNOWN_OFFSET always if (src.first.isUnknown() || node->offset.isUnknown()) { changed |= o->addPointsTo(src.first, src.second); continue; } if (node->len.isUnknown()) { if (*src.first < *node->offset) continue; } else { if (!src.first.inRange(*node->offset, *node->offset + *node->len - 1)) continue; } changed |= o->addPointsTo(src.first, src.second); } } // we need to take care of the case when src is zero initialized, // but points-to somewhere, imagine this: // // struct s { ptr1, ptr2, ptr3 }; // struct s1 = {0}; /* s1 is zero initialized */ // struct s1.ptr1 = &a; // struct s2; // memcpy(s1, s2, 0, 16); /* copy first two pointers */ // // in this case s2 will point to 'a' at offset 0, but won't // point to null at offset 8, but it should... fix it by adding // nullptr at UNKNOWN_OFFSET (we may loose precision, but we'll // be sound) if (srcNode->isZeroInitialized() && !((*node->offset == 0 && node->len.isUnknown()) || node->offset.isUnknown())) // src is zeroed and we don't copy whole memory? changed |= o->addPointsTo(UNKNOWN_OFFSET, NULLPTR); } return changed; }