// implements < , <= causes failures when used to sort Windows vectors bool vectorSort(SliceNode::Ptr ptr1, SliceNode::Ptr ptr2) { AssignmentPtr assign1 = ptr1->assign(); AssignmentPtr assign2 = ptr2->assign(); if (!assign2) return false; else if (!assign1) return true; Address addr1 = assign1->addr(); Address addr2 = assign2->addr(); if (addr1 == addr2) { AbsRegion &out1 = assign1->out(); AbsRegion &out2 = assign2->out(); return out1 < out2; } else { return addr1 < addr2; } }
void BoundFactsCalculator::CalcTransferFunction(Node::Ptr curNode, BoundFact *newFact){ SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); if (!node->assign()) return; if (node->assign() && node->assign()->out().absloc().type() == Absloc::Register && (node->assign()->out().absloc().reg() == x86::zf || node->assign()->out().absloc().reg() == x86_64::zf)) { // zf should be only predecessor of this node parsing_printf("\t\tThe predecessor node is zf assignment!\n"); newFact->SetPredicate(node->assign(), ExpandAssignment(node->assign()) ); return; } entryID id = node->assign()->insn()->getOperation().getID(); // The predecessor is not a conditional jump, // then we can determine buond fact based on the src assignment parsing_printf("\t\tThe predecessor node is normal node\n"); parsing_printf("\t\t\tentry id %d\n", id); AbsRegion &ar = node->assign()->out(); Instruction::Ptr insn = node->assign()->insn(); pair<AST::Ptr, bool> expandRet = ExpandAssignment(node->assign()); if (expandRet.first == NULL) { parsing_printf("\t\t\t No semantic support for this instruction. Assume it does not affect jump target calculation. Ignore it (Treat as identity function) except for ptest. ptest should kill the current predicate\n"); if (id == e_ptest) { parsing_printf("\t\t\t\tptest instruction, kill predciate.\n"); newFact->pred.valid = false; } return; } else { parsing_printf("\tAST: %s\n", expandRet.first->format().c_str()); } AST::Ptr calculation = expandRet.first; BoundCalcVisitor bcv(*newFact, node->block(), handleOneByteRead); calculation->accept(&bcv); AST::Ptr outAST; // If the instruction writes memory, // we need the AST that represents the memory access and the address. // When the AbsRegion represents memory, // the generator of the AbsRegion is set to be the AST that represents // the memory address during symbolic expansion. // In other cases, if the AbsRegion represents a register, // the generator is not set. if (ar.generator() != NULL) outAST = SimplifyAnAST(RoseAST::create(ROSEOperation(ROSEOperation::derefOp, ar.size()), ar.generator()), node->assign()->insn()->size()); else outAST = VariableAST::create(Variable(ar)); /* * Naively, bsf and bsr produces a bound from 0 to the number of bits of the source operands. * In pratice, especially in libc, the real bound is usually smaller than the size of the source operand. * Ex 1: shl %cl,%edx * bsf %rdx,%rcx * Here rcx is in range [0,31] rather than [0,63] even though rdx has 64 bits. * * Ex 2: pmovmskb %xmm0,%edx * bsf %rdx, %rdx * Here rdx is in range[0,15] because pmovmskb only sets the least significat 16 bits * In addition, overapproximation of the bound can lead to bogus control flow * that causes overlapping blocks or function. * It is important to further anaylze the operand in bsf rather than directly conclude the bound if (id == e_bsf || id == e_bsr) { int size = node->assign()->insn()->getOperand(0).getValue()->size(); newFact->GenFact(outAST, new BoundValue(StridedInterval(1,0, size * 8 - 1)), false); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } */ if (id == e_xchg) { newFact->SwapFact(calculation, outAST); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } if (id == e_push) { if (calculation->getID() == AST::V_ConstantAST) { ConstantAST::Ptr c = boost::static_pointer_cast<ConstantAST>(calculation); newFact->PushAConst(c->val().val); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } } if (id == e_pop) { if (newFact->PopAConst(outAST)) { parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } } // Assume all SETxx entry ids are contiguous if (id >= e_setb && id <= e_setz) { newFact->GenFact(outAST, new BoundValue(StridedInterval(1,0,1)), false); parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); return; } if (bcv.IsResultBounded(calculation)) { parsing_printf("\t\t\tGenerate bound fact for %s\n", outAST->format().c_str()); newFact->GenFact(outAST, new BoundValue(*bcv.GetResultBound(calculation)), false); } else { parsing_printf("\t\t\tKill bound fact for %s\n", outAST->format().c_str()); newFact->KillFact(outAST, false); } if (calculation->getID() == AST::V_VariableAST) { // We only track alising between registers parsing_printf("\t\t\t%s and %s are equal\n", calculation->format().c_str(), outAST->format().c_str()); newFact->InsertRelation(calculation, outAST, BoundFact::Equal); } newFact->AdjustPredicate(outAST, calculation); // Now try to track all aliasing. // Currently, all variables in the slice are presented as an AST // consists of input variables to the slice (the variables that // we do not the sources of their values). newFact->TrackAlias(DeepCopyAnAST(calculation), outAST); // Apply tracking relations to the calculation to generate a // potentially stricter bound BoundValue *strictValue = newFact->ApplyRelations(outAST); if (strictValue != NULL) { parsing_printf("\t\t\tGenerate stricter bound fact for %s\n", outAST->format().c_str()); newFact->GenFact(outAST, strictValue, false); } parsing_printf("\t\t\tCalculating transfer function: Output facts\n"); newFact->Print(); }
BoundFact* BoundFactsCalculator::Meet(Node::Ptr curNode) { SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); EdgeIterator gbegin, gend; curNode->ins(gbegin, gend); BoundFact* newFact = NULL; bool first = true; for (; gbegin != gend; ++gbegin) { TypedSliceEdge::Ptr edge = boost::static_pointer_cast<TypedSliceEdge>(*gbegin); SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(edge->source()); BoundFact *prevFact = GetBoundFactOut(srcNode); bool newCopy = false; if (prevFact == NULL) { parsing_printf("\t\tIncoming node %lx has not been calculated yet, ignore it\n", srcNode->addr()); continue; } else { // Otherwise, create a new copy. // We do not want to overwrite the bound fact // of the predecessor prevFact = new BoundFact(*prevFact); newCopy = true; } parsing_printf("\t\tMeet incoming edge from %lx\n", srcNode->addr()); parsing_printf("\t\tThe fact from %lx before applying transfer function\n", srcNode->addr()); prevFact->Print(); if (!srcNode->assign()) { prevFact = new BoundFact(*prevFact); newCopy = true; parsing_printf("\t\tThe predecessor node is the virtual entry ndoe\n"); if (firstBlock) { // If the indirect jump is in the entry block // of the function, we assume that rax is in // range [0,8] for analyzing the movaps table. // NEED TO HAVE A SAFE WAY TO DO THIS!! parsing_printf("\t\tApplying entry block rax assumption!\n"); AST::Ptr axAST; if (func->entry()->obj()->cs()->getAddressWidth() == 8) axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86_64::rax)))); else // DOES THIS REALLY SHOW UP IN 32-BIT CODE??? axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86::eax)))); prevFact->GenFact(axAST, new BoundValue(StridedInterval(1,0,8)), false); } } else if (srcNode->assign() && IsConditionalJump(srcNode->assign()->insn())) { prevFact = new BoundFact(*prevFact); newCopy = true; // If the predecessor is a conditional jump, // we can determine bound fact based on the predicate and the edge type parsing_printf("\t\tThe predecessor node is a conditional jump!\n"); if (!prevFact->ConditionalJumpBound(srcNode->assign()->insn(), edge->type())) { fprintf(stderr, "From %lx to %lx\n", srcNode->addr(), node->addr()); assert(0); } } ThunkBound(prevFact, srcNode, node, newCopy); parsing_printf("\t\tFact from %lx after applying transfer function\n", srcNode->addr()); prevFact->Print(); if (first) { // For the first incoming dataflow fact, // we just copy it. // We can do this because if an a-loc // is missing in the fact map, we assume // the a-loc is top. first = false; if (newCopy) newFact = prevFact; else newFact = new BoundFact(*prevFact); } else { newFact->Meet(*prevFact); if (newCopy) delete prevFact; } } return newFact; }
bool BoundFactsCalculator::CalculateBoundedFacts() { /* We use a dataflow analysis to calculate the value bound * of each register and potentially some memory locations. * The key steps of the dataflow analysis are * 1. Determine the analysis order: * First calculate all strongly connected components (SCC) * of the graph. The flow analysis inside a SCC is * iterative. The flow analysis between several SCCs * is done topologically. * 2. For each node, need to calculate the meet and * calculate the transfer function. * 1. The meet should be simply an intersection of all the bounded facts * along all paths. * 2. To calculate the transfer function, we first get the symbolic expression * of the instrution for the node. Then depending on the instruction operation * and the meet result, we know what are still bounded. For example, loading * memory is always unbounded; doing and operation on a register with a constant * makes the register bounded. */ DetermineAnalysisOrder(); queue<Node::Ptr> workingList; unordered_set<Node::Ptr, Node::NodePtrHasher> inQueue; unordered_map<Node::Ptr, int, Node::NodePtrHasher> inQueueLimit; for (int curOrder = 0; curOrder <= orderStamp; ++curOrder) { // We first determine which nodes are // in this SCC vector<Node::Ptr> curNodes; NodeIterator nbegin, nend; slice->allNodes(nbegin, nend); for (; nbegin != nend; ++nbegin) { if (analysisOrder[*nbegin] == curOrder) { curNodes.push_back(*nbegin); workingList.push(*nbegin); inQueue.insert(*nbegin); } } if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) { // If this SCC is an entry SCC, // we choose a node inside the SCC // and let it be top. // This should only contain the virtual entry node parsing_printf("This SCC does not incoming edges from outside\n"); boundFactsIn[curNodes[0]] = new BoundFact(); boundFactsOut[curNodes[0]] = new BoundFact(); } parsing_printf("Starting analysis inside SCC %d\n", curOrder); // We now start iterative analysis inside the SCC while (!workingList.empty()) { // We get the current node Node::Ptr curNode = workingList.front(); workingList.pop(); inQueue.erase(curNode); SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode); ++inQueueLimit[curNode]; if (inQueueLimit[curNode] > IN_QUEUE_LIMIT) continue; BoundFact* oldFactIn = GetBoundFactIn(curNode); parsing_printf("Calculate Meet for %lx", node->addr()); if (node->assign()) { parsing_printf(", insn: %s\n", node->assign()->insn()->format().c_str()); } else { if (node->block() == NULL) parsing_printf(", the VirtualExit node\n"); else parsing_printf(", the VirtualEntry node\n"); } parsing_printf("\tOld fact for %lx:\n", node->addr()); if (oldFactIn == NULL) parsing_printf("\t\t do not exist\n"); else oldFactIn->Print(); // We find all predecessors of the current node // and calculates the union of the analysis results // from the predecessors BoundFact* newFactIn = Meet(curNode); parsing_printf("\tNew fact at %lx\n", node->addr()); if (newFactIn != NULL) newFactIn->Print(); else parsing_printf("\t\tNot calculated\n"); // If the current node has not been calcualted yet, // or the new meet results are different from the // old ones, we keep the new results if (newFactIn != NULL && (oldFactIn == NULL || *oldFactIn != *newFactIn)) { parsing_printf("\tFacts change!\n"); if (oldFactIn != NULL) delete oldFactIn; boundFactsIn[curNode] = newFactIn; BoundFact* newFactOut = new BoundFact(*newFactIn); // The current node has a transfer function // that changes the analysis results CalcTransferFunction(curNode, newFactOut); if (boundFactsOut.find(curNode) != boundFactsOut.end() && boundFactsOut[curNode] != NULL) delete boundFactsOut[curNode]; boundFactsOut[curNode] = newFactOut; curNode->outs(nbegin, nend); for (; nbegin != nend; ++nbegin) // We only add node inside current SCC into the working list if (inQueue.find(*nbegin) == inQueue.end() && analysisOrder[*nbegin] == curOrder) { workingList.push(*nbegin); inQueue.insert(*nbegin); } } else { if (newFactIn != NULL) delete newFactIn; parsing_printf("\tFacts do not change!\n"); } } } return true; }
SymEval::Retval_t SymEval::process(SliceNode::Ptr ptr, Result_t &dbase, std::set<Edge::Ptr> &skipEdges) { bool failedTranslation; bool skippedEdge = false; bool skippedInput = false; bool success = false; std::map<const AbsRegion*, std::set<Assignment::Ptr> > inputMap; expand_cerr << "Calling process on " << ptr->format() << endl; // Don't try an expansion of a widen node... if (!ptr->assign()) return WIDEN_NODE; EdgeIterator begin, end; ptr->ins(begin, end); for (; begin != end; ++begin) { SliceEdge::Ptr edge = boost::static_pointer_cast<SliceEdge>(*begin); SliceNode::Ptr source = boost::static_pointer_cast<SliceNode>(edge->source()); // Skip this one to break a cycle. if (skipEdges.find(edge) != skipEdges.end()) { expand_cerr << "In process, skipping edge from " << source->format() << endl; skippedEdge = true; continue; } Assignment::Ptr assign = source->assign(); if (!assign) continue; // widen node expand_cerr << "Assigning input " << edge->data().format() << " from assignment " << assign->format() << endl; inputMap[&edge->data()].insert(assign); } expand_cerr << "\t Input map has size " << inputMap.size() << endl; // All of the expanded inputs are in the parameter dbase // If not (like this one), add it AST::Ptr ast; boost::tie(ast, failedTranslation) = SymEval::expand(ptr->assign()); // expand_cerr << "\t ... resulting in " << dbase.format() << endl; // We have an AST. Now substitute in all of its predecessors. for (std::map<const AbsRegion*, std::set<Assignment::Ptr> >::iterator iter = inputMap.begin(); iter != inputMap.end(); ++iter) { // If we have multiple secondary definitions, we: // if all definitions are equal, use the first // otherwise, use nothing AST::Ptr definition; for (std::set<Assignment::Ptr>::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) { AST::Ptr newDef = dbase[*iter2]; if (!definition) { definition = newDef; continue; } else if (definition->equals(newDef)) { continue; } else { // Not equal definition = AST::Ptr(); skippedInput = true; break; } } // The region used by the current assignment... const AbsRegion ® = *iter->first; // Create an AST around this one VariableAST::Ptr use = VariableAST::create(Variable(reg, ptr->addr())); if (!definition) { // Can happen if we're expanding out of order, and is generally harmless. continue; } expand_cerr << "Before substitution: " << (ast ? ast->format() : "<NULL AST>") << endl; if (!ast) { expand_cerr << "Skipping substitution because of null AST" << endl; } else { ast = AST::substitute(ast, use, definition); success = true; } expand_cerr << "\t result is " << (ast ? ast->format() : "<NULL AST>") << endl; } expand_cerr << "Result of substitution: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; // And attempt simplification again ast = simplifyStack(ast, ptr->addr(), ptr->func(), ptr->block()); expand_cerr << "Result of post-substitution simplification: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; dbase[ptr->assign()] = ast; if (failedTranslation) return FAILED_TRANSLATION; else if (skippedEdge || skippedInput) return SKIPPED_INPUT; else if (success) return SUCCESS; else return FAILED; }
// Do the previous, but use a Graph as a guide for // performing forward substitution on the AST results SymEval::Retval_t SymEval::expand(Dyninst::Graph::Ptr slice, DataflowAPI::Result_t &res) { bool failedTranslation = false; bool skippedInput = false; //cout << "Calling expand" << endl; // Other than the substitution this is pretty similar to the first example. NodeIterator gbegin, gend; slice->allNodes(gbegin, gend); // First, we'll sort the nodes in some deterministic order so that the loop removal // is deterministic std::vector<SliceNode::Ptr> sortVector; for ( ; gbegin != gend; ++gbegin) { Node::Ptr ptr = *gbegin; expand_cerr << "pushing " << (*gbegin)->format() << " to sortVector" << endl; SliceNode::Ptr cur = boost::static_pointer_cast<SliceNode>(ptr); sortVector.push_back(cur); } std::stable_sort(sortVector.begin(), sortVector.end(), vectorSort); // Optimal ordering of search ExpandOrder worklist; std::queue<Node::Ptr> dfs_worklist; std::vector<SliceNode::Ptr>::iterator vit = sortVector.begin(); for ( ; vit != sortVector.end(); ++vit) { SliceNode::Ptr ptr = *vit; Node::Ptr cur = boost::static_pointer_cast<Node>(ptr); dfs_worklist.push(cur); } /* First, we'll do DFS to check for circularities in the graph; * if so, mark them so we don't do infinite substitution */ std::map<Node::Ptr, int> state; while (!dfs_worklist.empty()) { Node::Ptr ptr = dfs_worklist.front(); dfs_worklist.pop(); dfs(ptr, state, worklist.skipEdges()); } slice->allNodes(gbegin, gend); for (; gbegin != gend; ++gbegin) { expand_cerr << "adding " << (*gbegin)->format() << " to worklist" << endl; Node::Ptr ptr = *gbegin; SliceNode::Ptr sptr = boost::static_pointer_cast<SliceNode>(ptr); worklist.insert(sptr,false); } /* have a list * for each node, process * if processessing succeeded, remove the element * if the size of the list has changed, continue */ while (1) { SliceNode::Ptr aNode; int order; boost::tie(aNode,order) = worklist.pop_next(); if (order == -1) // empty break; if (!aNode->assign()) { worklist.mark_done(aNode); continue; // Could be a widen point } expand_cerr << "Visiting node " << aNode->assign()->format() << " order " << order << endl; if (order != 0) { cerr << "ERROR: order is non zero: " << order << endl; } assert(order == 0); // there are no loops AST::Ptr prev = res[aNode->assign()]; Retval_t result = process(aNode, res, worklist.skipEdges()); AST::Ptr post = res[aNode->assign()]; switch (result) { case FAILED: return FAILED; break; case WIDEN_NODE: // Okay... break; case FAILED_TRANSLATION: failedTranslation = true; break; case SKIPPED_INPUT: skippedInput = true; break; case SUCCESS: break; } // We've visited this node, freeing its children // to be visited in turn worklist.mark_done(aNode); if (post && !(post->equals(prev))) { expand_cerr << "Adding successors to list, as new expansion " << endl << "\t" << post->format() << endl << " != " << endl << "\t" << (prev ? prev->format() : "<NULL>") << endl; EdgeIterator oB, oE; aNode->outs(oB, oE); for (; oB != oE; ++oB) { if(worklist.skipEdges().find(*oB) == worklist.skipEdges().end()) { SliceNode::Ptr out = boost::static_pointer_cast<SliceNode>( (*oB)->target()); worklist.insert(out); } } } } if (failedTranslation) return FAILED_TRANSLATION; else if (skippedInput) return SKIPPED_INPUT; else return SUCCESS; }