void BoundFactsCalculator::ThunkBound( BoundFact*& curFact, Node::Ptr src, Node::Ptr trg, bool &newCopy) { // This function checks whether any found thunk is between // the src node and the trg node. If there is any, then we have // extra bound information to be added. ParseAPI::Block *srcBlock; Address srcAddr = 0; if (src == Node::Ptr()) srcBlock = func->entry(); else { SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(src); srcBlock = srcNode->block(); srcAddr = srcNode->addr(); } SliceNode::Ptr trgNode = boost::static_pointer_cast<SliceNode>(trg); ParseAPI::Block *trgBlock = trgNode->block(); Address trgAddr = trgNode->addr(); bool first = true; for (auto tit = thunks.begin(); tit != thunks.end(); ++tit) { ParseAPI::Block* thunkBlock = tit->second.block; parsing_printf("\t\tCheck srcAddr at %lx, trgAddr at %lx, thunk at %lx\n", srcAddr, trgAddr, tit->first); if (src != Node::Ptr()) { if (srcBlock == thunkBlock) { if (srcAddr > tit->first) continue; } else { if (rf.thunk_ins[thunkBlock].find(srcBlock) == rf.thunk_ins[thunkBlock].end()) continue; } } if (trgBlock == thunkBlock) { if (trgAddr < tit->first) continue; } else { if (rf.thunk_outs[thunkBlock].find(trgBlock) == rf.thunk_outs[thunkBlock].end()) continue; } parsing_printf("\t\t\tfind thunk at %lx between the source and the target. Add fact", tit->first); BoundValue *bv = new BoundValue(tit->second.value); bv->Print(); if (first && !newCopy) { newCopy = true; curFact = new BoundFact(*curFact); } curFact->GenFact(VariableAST::create(Variable(AbsRegion(Absloc(tit->second.reg)))), bv, false); first = false; } }
void BoundFactsCalculator::DetermineAnalysisOrder() { NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); nodeColor.clear(); reverseOrder.clear(); analysisOrder.clear(); for (; nbegin != nend; ++nbegin) if (nodeColor.find(*nbegin) == nodeColor.end()) { NaturalDFS(*nbegin); } nodeColor.clear(); orderStamp = 0; for (auto nit = reverseOrder.rbegin(); nit != reverseOrder.rend(); ++nit) if (nodeColor.find(*nit) == nodeColor.end()) { ++orderStamp; ReverseDFS(*nit); } // Create a virtual entry node that has // edges into all entry SCCs SliceNode::Ptr virtualEntry = SliceNode::create(Assignment::Ptr(), func->entry(), func); analysisOrder[virtualEntry] = 0; for (int curOrder = 1; curOrder <= orderStamp; ++curOrder) { // First determine all nodes in this SCC vector<Node::Ptr> curNodes; NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); for (; nbegin != nend; ++nbegin) { if (analysisOrder[*nbegin] == curOrder) { curNodes.push_back(*nbegin); } } // If this SCC does not have any outside edge, // it is an entry SCC and we need to connect // the virtual entry to it if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) { if (curNodes.size() == 1) { // If the SCC has only one node, // we connect the virtual entry to this single node SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*(curNodes.begin())); slice->insertPair(virtualEntry, node, TypedSliceEdge::create(virtualEntry, node, FALLTHROUGH)); } else { // If there are more than one node in this SCC, // we do a DFS to see which nodes in the SCC can be // reached from the entry of the function without passing // through other nodes in the SCC. // Basically, we only connect edges from the virtual entry // to the entries of the SCC set<ParseAPI::Block*> visit; map<ParseAPI::Block*, vector<SliceNode::Ptr> >targetMap; for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) { SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*nit); ParseAPI::Block * b = node->block(); Address addr = node->addr(); if (targetMap.find(b) == targetMap.end()) { targetMap[b].push_back(node); } else if (targetMap[b][0]->addr() > addr) { targetMap[b].clear(); targetMap[b].push_back(node); } else if (targetMap[b][0]->addr() == addr) { targetMap[b].push_back(node); } } BuildEdgeFromVirtualEntry(virtualEntry, virtualEntry->block(), targetMap, visit, slice); } } } slice->clearEntryNodes(); slice->markAsEntryNode(virtualEntry); }
void BoundFactsCalculator::CalcTransferFunction(Node::Ptr curNode, BoundFact *newFact){ SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); if (!node->assign()) return; if (node->assign() && node->assign()->out().absloc().type() == Absloc::Register && (node->assign()->out().absloc().reg() == x86::zf || node->assign()->out().absloc().reg() == x86_64::zf)) { // zf should be only predecessor of this node parsing_printf("\t\tThe predecessor node is zf assignment!\n"); newFact->SetPredicate(node->assign(), ExpandAssignment(node->assign()) ); return; } entryID id = node->assign()->insn()->getOperation().getID(); // The predecessor is not a conditional jump, // then we can determine buond fact based on the src assignment parsing_printf("\t\tThe predecessor node is normal node\n"); parsing_printf("\t\t\tentry id %d\n", id); AbsRegion &ar = node->assign()->out(); Instruction::Ptr insn = node->assign()->insn(); pair<AST::Ptr, bool> expandRet = ExpandAssignment(node->assign()); if (expandRet.first == NULL) { parsing_printf("\t\t\t No semantic support for this instruction. Assume it does not affect jump target calculation. Ignore it (Treat as identity function) except for ptest. ptest should kill the current predicate\n"); if (id == e_ptest) { parsing_printf("\t\t\t\tptest instruction, kill predciate.\n"); newFact->pred.valid = false; } return; } else { parsing_printf("\tAST: %s\n", expandRet.first->format().c_str()); } AST::Ptr calculation = expandRet.first; BoundCalcVisitor bcv(*newFact, node->block(), handleOneByteRead); calculation->accept(&bcv); AST::Ptr outAST; // If the instruction writes memory, // we need the AST that represents the memory access and the address. // When the AbsRegion represents memory, // the generator of the AbsRegion is set to be the AST that represents // the memory address during symbolic expansion. // In other cases, if the AbsRegion represents a register, // the generator is not set. if (ar.generator() != NULL) outAST = SimplifyAnAST(RoseAST::create(ROSEOperation(ROSEOperation::derefOp, ar.size()), ar.generator()), node->assign()->insn()->size()); else outAST = VariableAST::create(Variable(ar)); /* * Naively, bsf and bsr produces a bound from 0 to the number of bits of the source operands. * In pratice, especially in libc, the real bound is usually smaller than the size of the source operand. * Ex 1: shl %cl,%edx * bsf %rdx,%rcx * Here rcx is in range [0,31] rather than [0,63] even though rdx has 64 bits. * * Ex 2: pmovmskb %xmm0,%edx * bsf %rdx, %rdx * Here rdx is in range[0,15] because pmovmskb only sets the least significat 16 bits * In addition, overapproximation of the bound can lead to bogus control flow * that causes overlapping blocks or function. * It is important to further anaylze the operand in bsf rather than directly conclude the bound if (id == e_bsf || id == e_bsr) { int size = node->assign()->insn()->getOperand(0).getValue()->size(); newFact->GenFact(outAST, new BoundValue(StridedInterval(1,0, size * 8 - 1)), false); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } */ if (id == e_xchg) { newFact->SwapFact(calculation, outAST); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } if (id == e_push) { if (calculation->getID() == AST::V_ConstantAST) { ConstantAST::Ptr c = boost::static_pointer_cast<ConstantAST>(calculation); newFact->PushAConst(c->val().val); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } } if (id == e_pop) { if (newFact->PopAConst(outAST)) { parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } } // Assume all SETxx entry ids are contiguous if (id >= e_setb && id <= e_setz) { newFact->GenFact(outAST, new BoundValue(StridedInterval(1,0,1)), false); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } if (bcv.IsResultBounded(calculation)) { parsing_printf("\t\t\tGenerate bound fact for %s\n", outAST->format().c_str()); newFact->GenFact(outAST, new BoundValue(*bcv.GetResultBound(calculation)), false); } else { parsing_printf("\t\t\tKill bound fact for %s\n", outAST->format().c_str()); newFact->KillFact(outAST, false); } if (calculation->getID() == AST::V_VariableAST) { // We only track alising between registers parsing_printf("\t\t\t%s and %s are equal\n", calculation->format().c_str(), outAST->format().c_str()); newFact->InsertRelation(calculation, outAST, BoundFact::Equal); } newFact->AdjustPredicate(outAST, calculation); // Now try to track all aliasing. // Currently, all variables in the slice are presented as an AST // consists of input variables to the slice (the variables that // we do not the sources of their values). newFact->TrackAlias(DeepCopyAnAST(calculation), outAST); // Apply tracking relations to the calculation to generate a // potentially stricter bound BoundValue *strictValue = newFact->ApplyRelations(outAST); if (strictValue != NULL) { parsing_printf("\t\t\tGenerate stricter bound fact for %s\n", outAST->format().c_str()); newFact->GenFact(outAST, strictValue, false); } parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); }
bool BoundFactsCalculator::CalculateBoundedFacts() { /* We use a dataflow analysis to calculate the value bound * of each register and potentially some memory locations. * The key steps of the dataflow analysis are * 1. Determine the analysis order: * First calculate all strongly connected components (SCC) * of the graph. The flow analysis inside a SCC is * iterative. The flow analysis between several SCCs * is done topologically. * 2. For each node, need to calculate the meet and * calculate the transfer function. * 1. The meet should be simply an intersection of all the bounded facts * along all paths. * 2. To calculate the transfer function, we first get the symbolic expression * of the instrution for the node. Then depending on the instruction operation * and the meet result, we know what are still bounded. For example, loading * memory is always unbounded; doing and operation on a register with a constant * makes the register bounded. */ DetermineAnalysisOrder(); queue<Node::Ptr> workingList; unordered_set<Node::Ptr, Node::NodePtrHasher> inQueue; unordered_map<Node::Ptr, int, Node::NodePtrHasher> inQueueLimit; for (int curOrder = 0; curOrder <= orderStamp; ++curOrder) { // We first determine which nodes are // in this SCC vector<Node::Ptr> curNodes; NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); for (; nbegin != nend; ++nbegin) { if (analysisOrder[*nbegin] == curOrder) { curNodes.push_back(*nbegin); workingList.push(*nbegin); inQueue.insert(*nbegin); } } if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) { // If this SCC is an entry SCC, // we choose a node inside the SCC // and let it be top. // This should only contain the virtual entry node parsing_printf("This SCC does not incoming edges from outside\n"); boundFactsIn[curNodes[0]] = new BoundFact(); boundFactsOut[curNodes[0]] = new BoundFact(); } parsing_printf("Starting analysis inside SCC %d\n", curOrder); // We now start iterative analysis inside the SCC while (!workingList.empty()) { // We get the current node Node::Ptr curNode = workingList.front(); workingList.pop(); inQueue.erase(curNode); SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); ++inQueueLimit[curNode]; if (inQueueLimit[curNode] > IN_QUEUE_LIMIT) continue; BoundFact* oldFactIn = GetBoundFactIn(curNode); parsing_printf("Calculate Meet for %lx", node->addr()); if (node->assign()) { parsing_printf(", insn: %s\n", node->assign()->insn()->format().c_str()); } else { if (node->block() == NULL) parsing_printf(", the VirtualExit node\n"); else parsing_printf(", the VirtualEntry node\n"); } parsing_printf("\tOld fact for %lx:\n", node->addr()); if (oldFactIn == NULL) parsing_printf("\t\t do not exist\n"); else oldFactIn->Print(); // We find all predecessors of the current node // and calculates the union of the analysis results // from the predecessors BoundFact* newFactIn = Meet(curNode); parsing_printf("\tNew fact at %lx\n", node->addr()); if (newFactIn != NULL) newFactIn->Print(); else parsing_printf("\t\tNot calculated\n"); // If the current node has not been calcualted yet, // or the new meet results are different from the // old ones, we keep the new results if (newFactIn != NULL && (oldFactIn == NULL || *oldFactIn != *newFactIn)) { parsing_printf("\tFacts change!\n"); if (oldFactIn != NULL) delete oldFactIn; boundFactsIn[curNode] = newFactIn; BoundFact* newFactOut = new BoundFact(*newFactIn); // The current node has a transfer function // that changes the analysis results CalcTransferFunction(curNode, newFactOut); if (boundFactsOut.find(curNode) != boundFactsOut.end() && boundFactsOut[curNode] != NULL) delete boundFactsOut[curNode]; boundFactsOut[curNode] = newFactOut; curNode->outs(nbegin, nend); for (; nbegin != nend; ++nbegin) // We only add node inside current SCC into the working list if (inQueue.find(*nbegin) == inQueue.end() && analysisOrder[*nbegin] == curOrder) { workingList.push(*nbegin); inQueue.insert(*nbegin); } } else { if (newFactIn != NULL) delete newFactIn; parsing_printf("\tFacts do not change!\n"); } } } return true; }
SymEval::Retval_t SymEval::process(SliceNode::Ptr ptr, Result_t &dbase, std::set<Edge::Ptr> &skipEdges) { bool failedTranslation; bool skippedEdge = false; bool skippedInput = false; bool success = false; std::map<const AbsRegion*, std::set<Assignment::Ptr> > inputMap; expand_cerr << "Calling process on " << ptr->format() << endl; // Don't try an expansion of a widen node... if (!ptr->assign()) return WIDEN_NODE; EdgeIterator begin, end; ptr->ins(begin, end); for (; begin != end; ++begin) { SliceEdge::Ptr edge = boost::static_pointer_cast<SliceEdge>(*begin); SliceNode::Ptr source = boost::static_pointer_cast<SliceNode>(edge->source()); // Skip this one to break a cycle. if (skipEdges.find(edge) != skipEdges.end()) { expand_cerr << "In process, skipping edge from " << source->format() << endl; skippedEdge = true; continue; } Assignment::Ptr assign = source->assign(); if (!assign) continue; // widen node expand_cerr << "Assigning input " << edge->data().format() << " from assignment " << assign->format() << endl; inputMap[&edge->data()].insert(assign); } expand_cerr << "\t Input map has size " << inputMap.size() << endl; // All of the expanded inputs are in the parameter dbase // If not (like this one), add it AST::Ptr ast; boost::tie(ast, failedTranslation) = SymEval::expand(ptr->assign()); // expand_cerr << "\t ... resulting in " << dbase.format() << endl; // We have an AST. Now substitute in all of its predecessors. for (std::map<const AbsRegion*, std::set<Assignment::Ptr> >::iterator iter = inputMap.begin(); iter != inputMap.end(); ++iter) { // If we have multiple secondary definitions, we: // if all definitions are equal, use the first // otherwise, use nothing AST::Ptr definition; for (std::set<Assignment::Ptr>::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) { AST::Ptr newDef = dbase[*iter2]; if (!definition) { definition = newDef; continue; } else if (definition->equals(newDef)) { continue; } else { // Not equal definition = AST::Ptr(); skippedInput = true; break; } } // The region used by the current assignment... const AbsRegion ® = *iter->first; // Create an AST around this one VariableAST::Ptr use = VariableAST::create(Variable(reg, ptr->addr())); if (!definition) { // Can happen if we're expanding out of order, and is generally harmless. continue; } expand_cerr << "Before substitution: " << (ast ? ast->format() : "<NULL AST>") << endl; if (!ast) { expand_cerr << "Skipping substitution because of null AST" << endl; } else { ast = AST::substitute(ast, use, definition); success = true; } expand_cerr << "\t result is " << (ast ? ast->format() : "<NULL AST>") << endl; } expand_cerr << "Result of substitution: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; // And attempt simplification again ast = simplifyStack(ast, ptr->addr(), ptr->func(), ptr->block()); expand_cerr << "Result of post-substitution simplification: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; dbase[ptr->assign()] = ast; if (failedTranslation) return FAILED_TRANSLATION; else if (skippedEdge || skippedInput) return SKIPPED_INPUT; else if (success) return SUCCESS; else return FAILED; }