void BoundFactsCalculator::ThunkBound( BoundFact*& curFact, Node::Ptr src, Node::Ptr trg, bool &newCopy) { // This function checks whether any found thunk is between // the src node and the trg node. If there is any, then we have // extra bound information to be added. ParseAPI::Block *srcBlock; Address srcAddr = 0; if (src == Node::Ptr()) srcBlock = func->entry(); else { SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(src); srcBlock = srcNode->block(); srcAddr = srcNode->addr(); } SliceNode::Ptr trgNode = boost::static_pointer_cast<SliceNode>(trg); ParseAPI::Block *trgBlock = trgNode->block(); Address trgAddr = trgNode->addr(); bool first = true; for (auto tit = thunks.begin(); tit != thunks.end(); ++tit) { ParseAPI::Block* thunkBlock = tit->second.block; parsing_printf("\t\tCheck srcAddr at %lx, trgAddr at %lx, thunk at %lx\n", srcAddr, trgAddr, tit->first); if (src != Node::Ptr()) { if (srcBlock == thunkBlock) { if (srcAddr > tit->first) continue; } else { if (rf.thunk_ins[thunkBlock].find(srcBlock) == rf.thunk_ins[thunkBlock].end()) continue; } } if (trgBlock == thunkBlock) { if (trgAddr < tit->first) continue; } else { if (rf.thunk_outs[thunkBlock].find(trgBlock) == rf.thunk_outs[thunkBlock].end()) continue; } parsing_printf("\t\t\tfind thunk at %lx between the source and the target. Add fact", tit->first); BoundValue *bv = new BoundValue(tit->second.value); bv->Print(); if (first && !newCopy) { newCopy = true; curFact = new BoundFact(*curFact); } curFact->GenFact(VariableAST::create(Variable(AbsRegion(Absloc(tit->second.reg)))), bv, false); first = false; } }
void BoundFactsCalculator::DetermineAnalysisOrder() { NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); nodeColor.clear(); reverseOrder.clear(); analysisOrder.clear(); for (; nbegin != nend; ++nbegin) if (nodeColor.find(*nbegin) == nodeColor.end()) { NaturalDFS(*nbegin); } nodeColor.clear(); orderStamp = 0; for (auto nit = reverseOrder.rbegin(); nit != reverseOrder.rend(); ++nit) if (nodeColor.find(*nit) == nodeColor.end()) { ++orderStamp; ReverseDFS(*nit); } // Create a virtual entry node that has // edges into all entry SCCs SliceNode::Ptr virtualEntry = SliceNode::create(Assignment::Ptr(), func->entry(), func); analysisOrder[virtualEntry] = 0; for (int curOrder = 1; curOrder <= orderStamp; ++curOrder) { // First determine all nodes in this SCC vector<Node::Ptr> curNodes; NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); for (; nbegin != nend; ++nbegin) { if (analysisOrder[*nbegin] == curOrder) { curNodes.push_back(*nbegin); } } // If this SCC does not have any outside edge, // it is an entry SCC and we need to connect // the virtual entry to it if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) { if (curNodes.size() == 1) { // If the SCC has only one node, // we connect the virtual entry to this single node SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*(curNodes.begin())); slice->insertPair(virtualEntry, node, TypedSliceEdge::create(virtualEntry, node, FALLTHROUGH)); } else { // If there are more than one node in this SCC, // we do a DFS to see which nodes in the SCC can be // reached from the entry of the function without passing // through other nodes in the SCC. // Basically, we only connect edges from the virtual entry // to the entries of the SCC set<ParseAPI::Block*> visit; map<ParseAPI::Block*, vector<SliceNode::Ptr> >targetMap; for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) { SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*nit); ParseAPI::Block * b = node->block(); Address addr = node->addr(); if (targetMap.find(b) == targetMap.end()) { targetMap[b].push_back(node); } else if (targetMap[b][0]->addr() > addr) { targetMap[b].clear(); targetMap[b].push_back(node); } else if (targetMap[b][0]->addr() == addr) { targetMap[b].push_back(node); } } BuildEdgeFromVirtualEntry(virtualEntry, virtualEntry->block(), targetMap, visit, slice); } } } slice->clearEntryNodes(); slice->markAsEntryNode(virtualEntry); }
BoundFact* BoundFactsCalculator::Meet(Node::Ptr curNode) { SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); EdgeIterator gbegin, gend; curNode->ins(gbegin, gend); BoundFact* newFact = NULL; bool first = true; for (; gbegin != gend; ++gbegin) { TypedSliceEdge::Ptr edge = boost::static_pointer_cast<TypedSliceEdge>(*gbegin); SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(edge->source()); BoundFact *prevFact = GetBoundFactOut(srcNode); bool newCopy = false; if (prevFact == NULL) { parsing_printf("\t\tIncoming node %lx has not been calculated yet, ignore it\n", srcNode->addr()); continue; } else { // Otherwise, create a new copy. // We do not want to overwrite the bound fact // of the predecessor prevFact = new BoundFact(*prevFact); newCopy = true; } parsing_printf("\t\tMeet incoming edge from %lx\n", srcNode->addr()); parsing_printf("\t\tThe fact from %lx before applying transfer function\n", srcNode->addr()); prevFact->Print(); if (!srcNode->assign()) { prevFact = new BoundFact(*prevFact); newCopy = true; parsing_printf("\t\tThe predecessor node is the virtual entry ndoe\n"); if (firstBlock) { // If the indirect jump is in the entry block // of the function, we assume that rax is in // range [0,8] for analyzing the movaps table. // NEED TO HAVE A SAFE WAY TO DO THIS!! parsing_printf("\t\tApplying entry block rax assumption!\n"); AST::Ptr axAST; if (func->entry()->obj()->cs()->getAddressWidth() == 8) axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86_64::rax)))); else // DOES THIS REALLY SHOW UP IN 32-BIT CODE??? axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86::eax)))); prevFact->GenFact(axAST, new BoundValue(StridedInterval(1,0,8)), false); } } else if (srcNode->assign() && IsConditionalJump(srcNode->assign()->insn())) { prevFact = new BoundFact(*prevFact); newCopy = true; // If the predecessor is a conditional jump, // we can determine bound fact based on the predicate and the edge type parsing_printf("\t\tThe predecessor node is a conditional jump!\n"); if (!prevFact->ConditionalJumpBound(srcNode->assign()->insn(), edge->type())) { fprintf(stderr, "From %lx to %lx\n", srcNode->addr(), node->addr()); assert(0); } } ThunkBound(prevFact, srcNode, node, newCopy); parsing_printf("\t\tFact from %lx after applying transfer function\n", srcNode->addr()); prevFact->Print(); if (first) { // For the first incoming dataflow fact, // we just copy it. // We can do this because if an a-loc // is missing in the fact map, we assume // the a-loc is top. first = false; if (newCopy) newFact = prevFact; else newFact = new BoundFact(*prevFact); } else { newFact->Meet(*prevFact); if (newCopy) delete prevFact; } } return newFact; }
bool BoundFactsCalculator::CalculateBoundedFacts() { /* We use a dataflow analysis to calculate the value bound * of each register and potentially some memory locations. * The key steps of the dataflow analysis are * 1. Determine the analysis order: * First calculate all strongly connected components (SCC) * of the graph. The flow analysis inside a SCC is * iterative. The flow analysis between several SCCs * is done topologically. * 2. For each node, need to calculate the meet and * calculate the transfer function. * 1. The meet should be simply an intersection of all the bounded facts * along all paths. * 2. To calculate the transfer function, we first get the symbolic expression * of the instrution for the node. Then depending on the instruction operation * and the meet result, we know what are still bounded. For example, loading * memory is always unbounded; doing and operation on a register with a constant * makes the register bounded. */ DetermineAnalysisOrder(); queue<Node::Ptr> workingList; unordered_set<Node::Ptr, Node::NodePtrHasher> inQueue; unordered_map<Node::Ptr, int, Node::NodePtrHasher> inQueueLimit; for (int curOrder = 0; curOrder <= orderStamp; ++curOrder) { // We first determine which nodes are // in this SCC vector<Node::Ptr> curNodes; NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); for (; nbegin != nend; ++nbegin) { if (analysisOrder[*nbegin] == curOrder) { curNodes.push_back(*nbegin); workingList.push(*nbegin); inQueue.insert(*nbegin); } } if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) { // If this SCC is an entry SCC, // we choose a node inside the SCC // and let it be top. // This should only contain the virtual entry node parsing_printf("This SCC does not incoming edges from outside\n"); boundFactsIn[curNodes[0]] = new BoundFact(); boundFactsOut[curNodes[0]] = new BoundFact(); } parsing_printf("Starting analysis inside SCC %d\n", curOrder); // We now start iterative analysis inside the SCC while (!workingList.empty()) { // We get the current node Node::Ptr curNode = workingList.front(); workingList.pop(); inQueue.erase(curNode); SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); ++inQueueLimit[curNode]; if (inQueueLimit[curNode] > IN_QUEUE_LIMIT) continue; BoundFact* oldFactIn = GetBoundFactIn(curNode); parsing_printf("Calculate Meet for %lx", node->addr()); if (node->assign()) { parsing_printf(", insn: %s\n", node->assign()->insn()->format().c_str()); } else { if (node->block() == NULL) parsing_printf(", the VirtualExit node\n"); else parsing_printf(", the VirtualEntry node\n"); } parsing_printf("\tOld fact for %lx:\n", node->addr()); if (oldFactIn == NULL) parsing_printf("\t\t do not exist\n"); else oldFactIn->Print(); // We find all predecessors of the current node // and calculates the union of the analysis results // from the predecessors BoundFact* newFactIn = Meet(curNode); parsing_printf("\tNew fact at %lx\n", node->addr()); if (newFactIn != NULL) newFactIn->Print(); else parsing_printf("\t\tNot calculated\n"); // If the current node has not been calcualted yet, // or the new meet results are different from the // old ones, we keep the new results if (newFactIn != NULL && (oldFactIn == NULL || *oldFactIn != *newFactIn)) { parsing_printf("\tFacts change!\n"); if (oldFactIn != NULL) delete oldFactIn; boundFactsIn[curNode] = newFactIn; BoundFact* newFactOut = new BoundFact(*newFactIn); // The current node has a transfer function // that changes the analysis results CalcTransferFunction(curNode, newFactOut); if (boundFactsOut.find(curNode) != boundFactsOut.end() && boundFactsOut[curNode] != NULL) delete boundFactsOut[curNode]; boundFactsOut[curNode] = newFactOut; curNode->outs(nbegin, nend); for (; nbegin != nend; ++nbegin) // We only add node inside current SCC into the working list if (inQueue.find(*nbegin) == inQueue.end() && analysisOrder[*nbegin] == curOrder) { workingList.push(*nbegin); inQueue.insert(*nbegin); } } else { if (newFactIn != NULL) delete newFactIn; parsing_printf("\tFacts do not change!\n"); } } } return true; }
SymEval::Retval_t SymEval::process(SliceNode::Ptr ptr, Result_t &dbase, std::set<Edge::Ptr> &skipEdges) { bool failedTranslation; bool skippedEdge = false; bool skippedInput = false; bool success = false; std::map<const AbsRegion*, std::set<Assignment::Ptr> > inputMap; expand_cerr << "Calling process on " << ptr->format() << endl; // Don't try an expansion of a widen node... if (!ptr->assign()) return WIDEN_NODE; EdgeIterator begin, end; ptr->ins(begin, end); for (; begin != end; ++begin) { SliceEdge::Ptr edge = boost::static_pointer_cast<SliceEdge>(*begin); SliceNode::Ptr source = boost::static_pointer_cast<SliceNode>(edge->source()); // Skip this one to break a cycle. if (skipEdges.find(edge) != skipEdges.end()) { expand_cerr << "In process, skipping edge from " << source->format() << endl; skippedEdge = true; continue; } Assignment::Ptr assign = source->assign(); if (!assign) continue; // widen node expand_cerr << "Assigning input " << edge->data().format() << " from assignment " << assign->format() << endl; inputMap[&edge->data()].insert(assign); } expand_cerr << "\t Input map has size " << inputMap.size() << endl; // All of the expanded inputs are in the parameter dbase // If not (like this one), add it AST::Ptr ast; boost::tie(ast, failedTranslation) = SymEval::expand(ptr->assign()); // expand_cerr << "\t ... resulting in " << dbase.format() << endl; // We have an AST. Now substitute in all of its predecessors. for (std::map<const AbsRegion*, std::set<Assignment::Ptr> >::iterator iter = inputMap.begin(); iter != inputMap.end(); ++iter) { // If we have multiple secondary definitions, we: // if all definitions are equal, use the first // otherwise, use nothing AST::Ptr definition; for (std::set<Assignment::Ptr>::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) { AST::Ptr newDef = dbase[*iter2]; if (!definition) { definition = newDef; continue; } else if (definition->equals(newDef)) { continue; } else { // Not equal definition = AST::Ptr(); skippedInput = true; break; } } // The region used by the current assignment... const AbsRegion ® = *iter->first; // Create an AST around this one VariableAST::Ptr use = VariableAST::create(Variable(reg, ptr->addr())); if (!definition) { // Can happen if we're expanding out of order, and is generally harmless. continue; } expand_cerr << "Before substitution: " << (ast ? ast->format() : "<NULL AST>") << endl; if (!ast) { expand_cerr << "Skipping substitution because of null AST" << endl; } else { ast = AST::substitute(ast, use, definition); success = true; } expand_cerr << "\t result is " << (ast ? ast->format() : "<NULL AST>") << endl; } expand_cerr << "Result of substitution: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; // And attempt simplification again ast = simplifyStack(ast, ptr->addr(), ptr->func(), ptr->block()); expand_cerr << "Result of post-substitution simplification: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; dbase[ptr->assign()] = ast; if (failedTranslation) return FAILED_TRANSLATION; else if (skippedEdge || skippedInput) return SKIPPED_INPUT; else if (success) return SUCCESS; else return FAILED; }