bool SymEval::expand(Result_t &res, std::set<InstructionPtr> &failedInsns, bool applyVisitors) { // Symbolic evaluation works off an Instruction // so we have something to hand to ROSE. failedInsns.clear(); for (Result_t::iterator i = res.begin(); i != res.end(); ++i) { if (i->second != AST::Ptr()) { // Must've already filled it in from a previous instruction crack continue; } Assignment::Ptr ptr = i->first; bool success = expandInsn(ptr->insn(), ptr->addr(), res); if (!success) failedInsns.insert(ptr->insn()); } if (applyVisitors) { // Must apply the visitor to each filled in element for (Result_t::iterator i = res.begin(); i != res.end(); ++i) { if (!i->second) continue; AST::Ptr tmp = simplifyStack(i->second, i->first->addr(), i->first->func(), i->first->block()); BooleanVisitor b; AST::Ptr tmp2 = tmp->accept(&b); i->second = tmp2; } } return (failedInsns.empty()); }
SymEvalPolicy::SymEvalPolicy(Result_t &r, Address a, Dyninst::Architecture ac, Instruction::Ptr insn) : res(r), arch(ac), addr(a), ip_(Handle<32>(wrap(Absloc::makePC(arch)))), failedTranslate_(false), insn_(insn) { // We also need to build aaMap FTW!!! for (Result_t::iterator iter = r.begin(); iter != r.end(); ++iter) { Assignment::Ptr a = iter->first; // For a different instruction... if (a->addr() != addr) continue; AbsRegion &o = a->out(); if (o.containsOfType(Absloc::Register)) { // We're assuming this is a single register... //std::cerr << "Marking register " << a << std::endl; aaMap[o.absloc()] = a; } else { // Use sufficiently-unique (Heap,0) Absloc // to represent a definition to a memory absloc aaMap[Absloc(0)] = a; } } }
void AssignmentConverter::handlePushEquivalent(const Instruction::Ptr I, Address addr, ParseAPI::Function *func, ParseAPI::Block *block, std::vector<AbsRegion> &operands, std::vector<Assignment::Ptr> &assignments) { // The handled-in operands are used to define *SP // And then we update SP AbsRegion stackTop = aConverter.stack(addr, func, block, true); AbsRegion sp(Absloc::makeSP(func->isrc()->getArch())); Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, stackTop)); spA->addInputs(operands); spA->addInput(sp); Assignment::Ptr spB = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spB->addInput(sp); assignments.push_back(spA); assignments.push_back(spB); }
pair<AST::Ptr, bool> BoundFactsCalculator::ExpandAssignment(Assignment::Ptr assign) { parsing_printf("Expand assignment : %s Instruction: %s\n", assign->format().c_str(), assign->insn()->format().c_str()); if (expandCache.find(assign) != expandCache.end()) { AST::Ptr ast = expandCache[assign]; if (ast) return make_pair(ast, true); else return make_pair(ast, false); } else { pair<AST::Ptr, bool> expandRet = SymEval::expand(assign, false); if (expandRet.second && expandRet.first) { parsing_printf("Original expand: %s\n", expandRet.first->format().c_str()); AST::Ptr calculation = SimplifyAnAST(expandRet.first, assign->insn()->size()); expandCache[assign] = calculation; } else { expandCache[assign] = AST::Ptr(); } return make_pair( expandCache[assign], expandRet.second ); } }
bool IndirectControlFlowAnalyzer::IsZeroExtend(Assignment::Ptr memLoc) { if (!memLoc) { parsing_printf("\tmemLoc is null\n"); return false; } Instruction i = memLoc->insn(); parsing_printf("check zero extend %s\n", i.format().c_str()); if (i.format().find("movz") != string::npos) return true; return false; }
void AssignmentConverter::handlePopEquivalent(const Instruction::Ptr I, Address addr, ParseAPI::Function *func, ParseAPI::Block *block, std::vector<AbsRegion> &operands, std::vector<Assignment::Ptr> &assignments) { // We use the top of the stack and any operands beyond the first. // (Can you pop into memory?) AbsRegion stackTop = aConverter.stack(addr, func, block, false); AbsRegion sp(Absloc::makeSP(func->isrc()->getArch())); Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, operands[0])); spA->addInput(stackTop); spA->addInput(sp); for (unsigned i = 1; i < operands.size(); i++) { spA->addInput(operands[i]); } // Now stack assignment Assignment::Ptr spB = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spB->addInput(sp); assignments.push_back(spA); assignments.push_back(spB); }
int IndirectControlFlowAnalyzer::GetMemoryReadSize(Assignment::Ptr memLoc) { if (!memLoc) { parsing_printf("\tmemLoc is null\n"); return 0; } Instruction i = memLoc->insn(); std::vector<Operand> ops; i.getOperands(ops); parsing_printf("\t there are %d operands\n", ops.size()); for (auto oit = ops.begin(); oit != ops.end(); ++oit) { Operand o = *oit; if (o.readsMemory()) { Expression::Ptr exp = o.getValue(); return exp->size(); } } return 0; }
void AssignmentConverter::convert(const Instruction::Ptr I, const Address &addr, ParseAPI::Function *func, ParseAPI::Block *block, std::vector<Assignment::Ptr> &assignments) { assignments.clear(); if (cache(func, addr, assignments)) return; // Decompose the instruction into a set of abstract assignments. // We don't have the Definition class concept yet, so we'll do the // hard work here. // Two phases: // 1) Special-cased for IA32 multiple definition instructions, // based on the opcode of the instruction // 2) Generic handling for things like flags and the PC. // Non-PC handling section switch(I->getOperation().getID()) { case e_push: { // SP = SP - 4 // *SP = <register> std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // The argument can be any of the following: // 1) a register (push eax); // 2) an immediate value (push $deadbeef) // 3) a memory location. std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePushEquivalent(I, addr, func, block, oper0, assignments); break; } case e_call: { // This can be seen as a push of the PC... std::vector<AbsRegion> pcRegion; pcRegion.push_back(Absloc::makePC(func->isrc()->getArch())); Absloc sp = Absloc::makeSP(func->isrc()->getArch()); handlePushEquivalent(I, addr, func, block, pcRegion, assignments); // Now for the PC definition // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, pcRegion[0])); if (!used.empty()) { for(std::vector<AbsRegion>::const_iterator u = used.begin(); u != used.end(); ++u) { if(!(u->contains(pcRegion[0])) && !(u->contains(sp))) { a->addInput(*u); } } } else { a->addInputs(pcRegion); } assignments.push_back(a); break; } case e_pop: { // <reg> = *SP // SP = SP + 4/8 // Amusingly... this doesn't have an intra-instruction dependence. It should to enforce // the order that <reg> = *SP happens before SP = SP - 4, but since the input to both // uses of SP in this case are the, well, input values... no "sideways" edges. // However, we still special-case it so that SP doesn't depend on the incoming stack value... // Also, we use the same logic for return, defining it as // PC = *SP // SP = SP + 4/8 // As with push, eSP shows up as operand 1. std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the explicit register, the second will be ESP. assert(operands.size() == 2); std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePopEquivalent(I, addr, func, block, oper0, assignments); break; } case e_leave: { // a leave is equivalent to: // mov ebp, esp // pop ebp // From a definition POV, we have the following: // SP = BP // BP = *SP // BP STACK[newSP] // | | // v v // SP -> BP // This is going to give the stack analysis fits... for now, I think it just reverts the // stack depth to 0. // TODO FIXME update stack analysis to make this really work. AbsRegion sp(Absloc::makeSP(func->isrc()->getArch())); AbsRegion fp(Absloc::makeFP(func->isrc()->getArch())); // Should be "we assign SP using FP" Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(fp); // And now we want "FP = (stack slot -2*wordsize)" /* AbsRegion stackTop(Absloc(0, 0, func)); */ // Actually, I think this is ebp = pop esp === ebp = pop ebp Assignment::Ptr fpA = Assignment::Ptr(new Assignment(I, addr, func, block, fp)); //fpA->addInput(aConverter.stack(addr + I->size(), func, false)); fpA->addInput(aConverter.frame(addr, func, block, false)); assignments.push_back(spA); assignments.push_back(fpA); break; } case e_ret_near: case e_ret_far: { // PC = *SP // SP = SP + 4/8 // Like pop, except it's all implicit. AbsRegion pc = AbsRegion(Absloc::makePC(func->isrc()->getArch())); Assignment::Ptr pcA = Assignment::Ptr(new Assignment(I, addr, func, block, pc)); pcA->addInput(aConverter.stack(addr, func, block, false)); AbsRegion sp = AbsRegion(Absloc::makeSP(func->isrc()->getArch())); Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(sp); assignments.push_back(pcA); assignments.push_back(spA); break; } case e_xchg: { // xchg defines two abslocs, and uses them as appropriate... std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // We use the first to define the second, and vice versa std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); std::vector<AbsRegion> oper1; aConverter.convertAll(operands[1].getValue(), addr, func, block, oper1); // Okay. We may have a memory reference in here, which will // cause either oper0 or oper1 to have multiple entries (the // remainder will be registers). So. Use everything from oper1 // to define oper0[0], and vice versa. Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, oper0[0])); a->addInputs(oper1); Assignment::Ptr b = Assignment::Ptr(new Assignment(I, addr, func, block, oper1[0])); b->addInputs(oper0); assignments.push_back(a); assignments.push_back(b); break; } case power_op_stwu: { std::vector<Operand> operands; I->getOperands(operands); // stwu <a>, <b>, <c> // <a> = R1 // <b> = -16(R1) // <c> = R1 // From this, R1 <= R1 - 16; -16(R1) <= R1 // So a <= b (without a deref) // deref(b) <= c std::set<Expression::Ptr> writes; I->getMemoryWriteOperands(writes); assert(writes.size() == 1); Expression::Ptr tmp = *(writes.begin()); AbsRegion effAddr = aConverter.convert(tmp, addr, func, block); std::vector<AbsRegion> regions; aConverter.convertAll(operands[0].getValue(), addr, func, block, regions); AbsRegion RS = regions[0]; regions.clear(); aConverter.convertAll(operands[2].getValue(), addr, func, block, regions); AbsRegion RA = regions[0]; Assignment::Ptr mem = Assignment::Ptr(new Assignment(I, addr, func, block, effAddr)); mem->addInput(RS); Assignment::Ptr ra = Assignment::Ptr(new Assignment(I, addr, func, block, RA)); ra->addInput(RS); assignments.push_back(mem); assignments.push_back(ra); break; } default: // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); for (std::vector<AbsRegion>::const_iterator i = defined.begin(); i != defined.end(); ++i) { Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, *i)); a->addInputs(used); assignments.push_back(a); } break; } // Now for flags... // According to Matt, the easiest way to represent dependencies for flags on // IA-32/AMD-64 is to have them depend on the inputs to the instruction and // not the outputs of the instruction; therefore, there's no intra-instruction // dependence. // PC-handling section // Most instructions use the PC to set the PC. This includes calls, relative branches, // and the like. So we're basically looking for indirect branches or absolute branches. // (are there absolutes on IA-32?). // Also, conditional branches and the flag registers they use. if (cacheEnabled_) { cache_[func][addr] = assignments; } }
SymEval::Retval_t SymEval::process(SliceNode::Ptr ptr, Result_t &dbase, std::set<Edge::Ptr> &skipEdges) { bool failedTranslation; bool skippedEdge = false; bool skippedInput = false; bool success = false; std::map<const AbsRegion*, std::set<Assignment::Ptr> > inputMap; expand_cerr << "Calling process on " << ptr->format() << endl; // Don't try an expansion of a widen node... if (!ptr->assign()) return WIDEN_NODE; EdgeIterator begin, end; ptr->ins(begin, end); for (; begin != end; ++begin) { SliceEdge::Ptr edge = boost::static_pointer_cast<SliceEdge>(*begin); SliceNode::Ptr source = boost::static_pointer_cast<SliceNode>(edge->source()); // Skip this one to break a cycle. if (skipEdges.find(edge) != skipEdges.end()) { expand_cerr << "In process, skipping edge from " << source->format() << endl; skippedEdge = true; continue; } Assignment::Ptr assign = source->assign(); if (!assign) continue; // widen node expand_cerr << "Assigning input " << edge->data().format() << " from assignment " << assign->format() << endl; inputMap[&edge->data()].insert(assign); } expand_cerr << "\t Input map has size " << inputMap.size() << endl; // All of the expanded inputs are in the parameter dbase // If not (like this one), add it AST::Ptr ast; boost::tie(ast, failedTranslation) = SymEval::expand(ptr->assign()); // expand_cerr << "\t ... resulting in " << dbase.format() << endl; // We have an AST. Now substitute in all of its predecessors. for (std::map<const AbsRegion*, std::set<Assignment::Ptr> >::iterator iter = inputMap.begin(); iter != inputMap.end(); ++iter) { // If we have multiple secondary definitions, we: // if all definitions are equal, use the first // otherwise, use nothing AST::Ptr definition; for (std::set<Assignment::Ptr>::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) { AST::Ptr newDef = dbase[*iter2]; if (!definition) { definition = newDef; continue; } else if (definition->equals(newDef)) { continue; } else { // Not equal definition = AST::Ptr(); skippedInput = true; break; } } // The region used by the current assignment... const AbsRegion ® = *iter->first; // Create an AST around this one VariableAST::Ptr use = VariableAST::create(Variable(reg, ptr->addr())); if (!definition) { // Can happen if we're expanding out of order, and is generally harmless. continue; } expand_cerr << "Before substitution: " << (ast ? ast->format() : "<NULL AST>") << endl; if (!ast) { expand_cerr << "Skipping substitution because of null AST" << endl; } else { ast = AST::substitute(ast, use, definition); success = true; } expand_cerr << "\t result is " << (ast ? ast->format() : "<NULL AST>") << endl; } expand_cerr << "Result of substitution: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; // And attempt simplification again ast = simplifyStack(ast, ptr->addr(), ptr->func(), ptr->block()); expand_cerr << "Result of post-substitution simplification: " << ptr->assign()->format() << " == " << (ast ? ast->format() : "<NULL AST>") << endl; dbase[ptr->assign()] = ast; if (failedTranslation) return FAILED_TRANSLATION; else if (skippedEdge || skippedInput) return SKIPPED_INPUT; else if (success) return SUCCESS; else return FAILED; }