/************************************************************************** * Main function. This function is run on each node that is being traversed * in the graph. For each node, we determine the successors and check * if those have been previously seen. If yes, a cycle may exist. **************************************************************************/ bool CompassAnalyses::CycleDetection::Traversal::run(string& name, SgGraphNode* node, SgGraphNode* previous){ // check known function calls and resolve variables ROSE_ASSERT(node); //cerr << " cycledetection->run " << node->get_name() << endl; SgAsmFunction* func = isSgAsmFunction(node->get_SgNode()); if (func) { // if the node is a function, we clear the visited nodes // this should speed up our search visited.clear(); return false; } successors.clear(); ROSE_ASSERT(vizzGraph); vizzGraph->getSuccessors(node, successors); vector<SgGraphNode*>::iterator succ = successors.begin(); for (;succ!=successors.end();++succ) { // for each successor do... SgGraphNode* next = *succ; // if the node is an instruction, we check if it was visited // if not, we add it to the visited set, otherwise a cycle is present std::set<SgGraphNode*>::iterator it =visited.find(next); if (it!=visited.end()) { // found this node in visited list SgAsmx86Instruction* nodeSg = isSgAsmx86Instruction(node->get_SgNode()); SgAsmx86Instruction* nextSg = isSgAsmx86Instruction(next->get_SgNode()); if (debug) { std::string outputText = "Found possible cycle between "; outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and "; outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")"; std::cerr << outputText << std::endl; output->addOutput(new CheckerOutput(nodeSg, outputText)); } bool validCycle = checkIfValidCycle(node,next); if (validCycle) { std::string outputText = "Found cycle between "; outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and "; outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")"; std::cerr << outputText << std::endl; output->addOutput(new CheckerOutput(nodeSg, outputText)); cycleFound[node]=next; } else { if (debug) std::cerr << "This is not a cyclic node " << std::endl; } } } visited.insert(node); return false; }
void CompassAnalyses::BinaryInterruptAnalysis::Traversal::getValueForDefinition(std::vector<uint64_t>& vec, std::vector<uint64_t>& positions, uint64_t& fpos, SgGraphNode* node, std::pair<X86RegisterClass, int> reg ) { set <SgGraphNode*> defNodeSet = getDefFor(node, reg); if (RoseBin_support::DEBUG_MODE()) cout << " size of found NodeSet = " << defNodeSet.size() <<endl; set <SgGraphNode*>::const_iterator it = defNodeSet.begin(); for (;it!=defNodeSet.end();++it) { SgGraphNode* defNode = *it; if (RoseBin_support::DEBUG_MODE() && defNode) cout << " investigating ... " << defNode->get_name() <<endl; ROSE_ASSERT(defNode); SgAsmx86Instruction* inst = isSgAsmx86Instruction(defNode->get_SgNode()); ROSE_ASSERT(inst); positions.push_back(inst->get_address()); // the right hand side of the instruction is either a use or a value bool memRef = false, regRef = false; std::pair<X86RegisterClass, int> regRight = check_isRegister(defNode, inst, true, memRef, regRef); if (RoseBin_support::DEBUG_MODE()) { string regName = unparseX86Register(RegisterDescriptor(reg.first, reg.second, 0, 64), NULL); string regNameRight = unparseX86Register(RegisterDescriptor(regRight.first, regRight.second, 0, 64), NULL); cout << " VarAnalysis: getValueForDef . " << regName << " right hand : " << regNameRight <<endl; } if (!regRef) { // it is either a memref or a value if (!memRef) { // get value of right hand side instruction uint64_t val = getValueOfInstr(inst, true); vec.push_back(val); fpos = inst->get_address(); if (RoseBin_support::DEBUG_MODE()) cout << " found valueOfInst = " << RoseBin_support::ToString(val) <<endl; } } else { // it is a register reference. I.e we need to follow the usage edge to find the // definition of that node SgGraphNode* usageNode = g_algo->getDefinitionForUsage(vizzGraph,defNode); if (usageNode && usageNode!=node) { if (RoseBin_support::DEBUG_MODE() && usageNode) cout << " following up usage for " << usageNode->get_name() <<endl; getValueForDefinition(vec, positions, fpos, usageNode, regRight); } else { // we look at the same node. cout << " ERROR :: Either following usage to itself or usageNode = NULL. " << usageNode << endl; } } } }
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) { assert (minNumStepsToFindError >= 1); // Can't find an error on the first step assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0); makeRegMap(origRegisterMap, ""); makeRegMapZero(newRegisterMap); isValidIp = false_(); validIPs.clear(); Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1)); addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount))); resetState = problem.build_op_eq(stepCount, zero(32)); errorsEnabled = problem.build_op_and( problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)), (maxNumStepsToFindError == 0 ? true_() : problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError)))); { vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction); for (size_t i = 0; i < functions.size(); ++i) { functionStarts.push_back(isSgAsmFunction(functions[i])->get_address()); // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address()); } } { vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock); for (size_t i = 0; i < blocks.size(); ++i) { SgAsmBlock* b = isSgAsmBlock(blocks[i]); if (!b->get_statementList().empty() && isSgAsmx86Instruction(b->get_statementList().front())) { blockStarts.push_back(b->get_address()); // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address()); } } } { vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmx86Instruction); for (size_t i = 0; i < calls.size(); ++i) { SgAsmx86Instruction* b = isSgAsmx86Instruction(calls[i]); if (b->get_kind() != x86_call) continue; returnPoints.push_back(b->get_address() + b->get_raw_bytes().size()); // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size()); } } { vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmx86Instruction); for (size_t i = 0; i < instructions.size(); ++i) { SgAsmx86Instruction* b = isSgAsmx86Instruction(instructions[i]); validIPs.push_back(b->get_address()); } } }
virtual bool operator()(bool enabled, const Args &args) /*overrides*/ { if (enabled) { if (!triggered && args.insn->get_address()==when) { triggered = true; initialize_state(args.thread); } SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.insn); if (triggered && insn) { RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str()); policy.get_state().registers.ip = SymbolicSemantics::ValueType<32>(insn->get_address()); semantics.processInstruction(insn); SMTSolver::Stats smt_stats = yices.get_stats(); m->mesg("%s: mem-cell list size: %zu elements\n", name, policy.get_state().memory.cell_list.size()); m->mesg("%s: SMT stats: ncalls=%zu, input=%zu bytes, output=%zu bytes\n", name, smt_stats.ncalls, smt_stats.input_size, smt_stats.output_size); yices.reset_stats(); #if 0 std::ostringstream ss; ss <<policy; m->mesg("%s", ss.str().c_str()); #endif } } return enabled; }
void CompassAnalyses::BinPrintAsmInstruction::Traversal:: visit(SgNode* n) { if (isSgBinaryComposite(n) && file==NULL) file = isSgBinaryComposite(n); SgAsmx86Instruction* binInst = isSgAsmx86Instruction(n); if (binInst==NULL) return; ROSE_ASSERT(binInst); string className = rose::stringifyX86InstructionKind(binInst->get_kind(), "x86_"); int nr = 1; //rose_hash::unordered_map<std::string, int>::const_iterator it = instMap.find(className); rose_hash::unordered_map<std::string, int>::const_iterator it = instMap.find(className); if (it!=instMap.end()) { nr = it->second; nr++; } instMap[className]=nr; unsigned int address = binInst->get_address(); ostringstream addrhex; addrhex << hex << setw(8) << address ; string address_str = addrhex.str(); } //End of the visit function.
// Replace the comparator defined below? bool operator<(const Definition& other) const { if (definer == NULL && other.definer != NULL) return true; if (definer != NULL && other.definer == NULL) return false; if (definer == NULL && other.definer == NULL) return (access < other.access); if (definer->get_address() < other.definer->get_address()) return true; if (definer->get_address() > other.definer->get_address()) return false; return (access < other.access); }
SgAsmInstruction* RoseBin_FlowAnalysis::resolveFunction(SgAsmInstruction* instx, bool hasStopCondition) { SgAsmx86Instruction* inst = isSgAsmx86Instruction(instx); if (inst==NULL) return NULL; ROSE_ASSERT(g_algo->info); SgAsmInstruction* nextFlow = inst->cfgBinFlowOutEdge(g_algo->info); // if current node is not a controltransfer node (e.g. jmp, ret, ...), // then there should be a flow to a next node // SgAsmx86ControlTransferInstruction* contrlInst = isSgAsmx86ControlTransferInstruction(inst); if (nextFlow==NULL && hasStopCondition==false) { // && !isSgAsmx86Jmp(inst)) { // in this case, we have a ordinary node that should be connected to the next block // now lets find the next block and create a function for these two blocks uint64_t addrInst = inst->get_address(); uint64_t size = (inst->get_raw_bytes()).size(); uint64_t nextAddr = addrInst+size; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator it2 = rememberInstructions.find(nextAddr); if (it2!=rememberInstructions.end()) { // found the next instruction nextFlow = isSgAsmInstruction(it2->second); //if (RoseBin_support::DEBUG_MODE()) // cout << " function resolution: resolving next : " << nextFlow->class_name() << " this : " // << unparser->unparseInstruction(inst) << endl; } } else if (nextFlow==NULL && hasStopCondition==false && inst->get_kind() == x86_jmp) { // in this case we want to connect to the destination ROSE_ASSERT(g_algo->info); nextFlow = inst->cfgBinFlowOutEdge(g_algo->info); //if (RoseBin_support::DEBUG_MODE()) // cerr << " function resolution: resolving jump " << nextFlow << " this : " << inst->class_name() << endl; } else { if (RoseBin_support::DEBUG_MODE()) if (!(inst->get_kind() == x86_nop || inst->get_kind() == x86_ret)) cerr << " WARNING: function resolution:: cant resolve : " << inst->class_name() << "(" << unparseInstruction(inst) << ")" << endl; } return nextFlow; }
void InitPointerToNull::visit(SgNode* node) { if (isSgAsmFunction(node)) { memoryWrites.clear(); memoryRead.clear(); } else if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) { // this is the address of the mov instruction prior to the call //rose_addr_t resolveAddr=0; SgAsmx86Instruction* inst = isSgAsmx86Instruction(node); SgNode* instBlock = NULL; if (project) instBlock= isSgAsmBlock(inst->get_parent()); else //we run IDA, this is different instBlock=inst; if (instBlock==NULL) return; SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent()); if (instFunc==NULL) return; // we have found a mov instruction // we need to check if it is a mov mem, (value or reg) // assignment of variable // forgot mov mem, mem // or we find a mov reg, mem // usage of variable // make sure a variable is assigned before used SgAsmOperandList * ops = inst->get_operandList(); SgAsmExpressionPtrList& opsList = ops->get_operands(); SgAsmExpressionPtrList::iterator itOP = opsList.begin(); SgAsmMemoryReferenceExpression* memL=NULL; SgAsmMemoryReferenceExpression* memR=NULL; SgAsmRegisterReferenceExpression* regL=NULL; SgAsmRegisterReferenceExpression* regR=NULL; SgAsmValueExpression* Val = NULL; int iteration=0; for (;itOP!=opsList.end();++itOP) { SgAsmExpression* exp = *itOP; ROSE_ASSERT(exp); if (iteration==1) { // right hand side memR = isSgAsmMemoryReferenceExpression(exp); regR = isSgAsmRegisterReferenceExpression(exp); Val = isSgAsmValueExpression(exp); } if (iteration==0) { // left hand side memL = isSgAsmMemoryReferenceExpression(exp); regL = isSgAsmRegisterReferenceExpression(exp); iteration++; } } //for if ((memL && regR) || (memL && Val) || (memL && memR)) { // could be assignment to address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL); // apparently the reference to memory does not always have to be BP but // can also be IP if it is a static variable. How will we handle global variables? //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL); //if (containsBP) { // this is memory write with offset to BP // remember this memory location as a write if (debug) cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; memoryWrites.insert(addr); //} } else if (regL && memR) { // could be usage of address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR); bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR); if (containsBP) { // this is memory read with offset to BP // did we see a write for this? If not, it is not initialized! std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr); if (it!=memoryWrites.end()) { // found write, everything is good if (debug) cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; } else { std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr); if (it2!=memoryRead.end()) { // found this case before } else { if (debug) cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl; string res = "Possibly uninitialized variable: "; string funcname=""; SgAsmBlock* b = isSgAsmBlock(inst->get_parent()); SgAsmFunction* func = NULL; if (b) func=isSgAsmFunction(b->get_parent()); if (func) funcname = func->get_name(); res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+ " <"+inst->get_comment()+"> in function: "+funcname; result[inst]= res; memoryRead.insert(addr); } } } } } }
void visit(SgNode *node) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); }
int64_t RoseBin_DataFlowAbstract::trackValueForRegister( SgGraphNode* node, std::pair<X86RegisterClass, int> codeSearch, bool& cantTrack, SgAsmx86RegisterReferenceExpression* refExpr_rightHand) { int64_t value = 0xffffffff; if (RoseBin_support::DEBUG_MODE()) cout << " ........ trying to resolve value for register :: " << codeSearch.first << "." << codeSearch.second << endl; SgAsmx86Instruction* inst = isSgAsmx86Instruction(node->get_SgNode()); ROSE_ASSERT(inst); std::pair<X86RegisterClass, int> code = std::make_pair((X86RegisterClass)refExpr_rightHand->get_descriptor().get_major(), refExpr_rightHand->get_descriptor().get_minor()); // iterate up and find an assignment to this register codeSearch i.e. instr codeSearch, esi bool condInst = RoseBin_support::isConditionalInstruction(inst); bool condInstFlag = RoseBin_support::isConditionalFlagInstruction(inst); if (condInstFlag==false) { // the instruction is not dependent on a flag if (condInst==false) { // the instruction is not dependent on a value in one of its operands // easiest track SgGraphNode* previous = getPredecessor(node); /* vector <SgGraphNode*> vec; vizzGraph->getPredecessors(node, vec); if (vec.size()==1) { // found one predecessor SgGraphNode* previous = vec.back(); ROSE_ASSERT(previous); string name = vizzGraph->getProperty(SgGraph::name, previous); if (RoseBin_support::DEBUG_MODE()) cout << " tracking recursive var " << name << endl; value = trackValueForRegister(previous, code, cantTrack, refExpr_rightHand); } else if (vec.size()>1) { cerr << " Tracking:: Problem, we have more than one predecessor for a node... cant track this " << endl; exit(0); } */ value = trackValueForRegister(previous, code, cantTrack, refExpr_rightHand); } else { // the instruction is dependent on a value in one of its operands // e.g. cmovz eax, esi (moved only if esi=0); // need to track the value of esi to track the value of eax .. more complicated! int addr = inst->get_address(); if (RoseBin_support::DEBUG_MODE()) { cout << " ERROR ------------------------------------------ " << endl; cout << RoseBin_support::HexToString(addr) << " " << inst->class_name() << " -- CANT resolve the value of the register because it depends on CONDITION -- code " << code.first << "." << code.second << endl; } cantTrack =true; } } else { // the instruction is dependent on a flag int addr = inst->get_address(); if (RoseBin_support::DEBUG_MODE()) { cout << " ERROR ------------------------------------------ " << endl; cout << RoseBin_support::HexToString(addr) << " " << inst->class_name() << " -- CANT resolve the value of the register because it depends on FLAGS -- code " << code.first << "." << code.second << endl; } cantTrack =true; } return value; }
void RoseBin_FlowAnalysis::process_jumps() { if (RoseBin_support::DEBUG_MODE()) cerr << "\n >>>>>>>>> processing jumps ... " << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it; for (it=rememberInstructions.begin();it!=rememberInstructions.end();++it) { SgAsmx86Instruction* inst = isSgAsmx86Instruction(it->second); if (inst->get_kind() == x86_call) { //cerr << "Found call at " << std::hex << inst->get_address() << endl; SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target) { //cerr << "Target is " << std::hex << target->get_address() << endl; // inst->get_targets().push_back(target); // we set the sources (for each node) ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[target].insert(inst->get_address()); // tps: changed this algorithm so that it runs in // linear time! ROSE_ASSERT (target->get_parent()); if (target->get_parent()) { // ROSE_ASSERT(target->get_parent()); SgAsmNode* b_b = target; if (!db) b_b = isSgAsmNode(target->get_parent()); ROSE_ASSERT(b_b); SgAsmFunction* b_func = isSgAsmFunction(b_b->get_parent()); if (b_func) { // (16/Oct/07) tps: this is tricky, it appears that sometimes the target can // be just a jmp to a new location, so we should forward this information to the correct // function. // Therefore we need to check if the current function has a return statement. // If not, we want to forward this information. if (target->get_kind() == x86_jmp) { //cerr << " >>>>>>>> found a jmp target - number of children: " << b_func->get_traversalSuccessorContainer().size() << endl; if (b_func->get_numberOfTraversalSuccessors()==1) { SgAsmx86Instruction* target2 = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target2) { b_b = target2; if (!db) b_b = isSgAsmNode(target2->get_parent()); b_func = isSgAsmFunction(b_b->get_parent()); } } } if (inst->get_parent()) { //cerr << "Inst has a parent" << endl; if (inst->get_comment()=="") inst->set_comment(""+b_func->get_name()); ROSE_ASSERT(g_algo->info); SgAsmInstruction* inst_after = g_algo->info->getInstructionAtAddress(inst->get_address() + inst->get_raw_bytes().size()); // inst->cfgBinFlowOutEdge(info); if (inst_after) { //cerr << "Added dest " << std::hex << isSgAsmStatement(inst_after)->get_address() << " for function" << endl; b_func->append_dest(isSgAsmStatement(inst_after)); } } } else { if (RoseBin_support::DEBUG_MODE()) cerr << " NO FUNCTION DETECTED ABOVE BLOCK . " << endl; } } else { if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING :: process_jumps: target has no parent ... i.e. no FunctionDeclaration to it " << target->class_name() << endl; } } else { if (inst) if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING :: process_jumps: No target found for node " << RoseBin_support::HexToString(inst->get_address()) << " " << inst->get_mnemonic() << endl; } } else { // might be a jmp SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target) { // inst->get_targets().push_back(target); // we set the sources (for each node) ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[target].insert(inst->get_address()); } } } //cerr << "\n >>>>>>>>> processing jumps ... done. " << endl; // cerr << "\n >>>>>>>>> resolving RET jumps ... " << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it2; for (it2=rememberInstructions.begin();it2!=rememberInstructions.end();++it2) { //int id = it2->first; SgAsmx86Instruction* target = isSgAsmx86Instruction(it2->second); ROSE_ASSERT (target); #if 1 if (target->get_kind() == x86_ret) { SgAsmNode* b_b = target; if (!db) b_b = isSgAsmNode(target->get_parent()); SgAsmFunction* parent = isSgAsmFunction(b_b->get_parent()); if (parent) { //ROSE_ASSERT(parent); std::vector <SgAsmStatement*> dest_list = parent->get_dest(); for (size_t i = 0; i < dest_list.size(); ++i) { ROSE_ASSERT (isSgAsmInstruction(dest_list[i])); //cerr << "Adding ret target " << std::hex << dest_list[i]->get_address() << " to " << std::hex << target->get_address() << endl; //info->indirectJumpAndReturnTargets[target].insert(dest_list[i]->get_address()); ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[isSgAsmInstruction(dest_list[i])].insert(target->get_address()); } std::vector <SgAsmStatement*>::iterator it3 = dest_list.begin(); for (; it3!=dest_list.end();++it3) { SgAsmInstruction* dest = isSgAsmInstruction(*it3); if (dest) { dest->append_sources(target); //cerr << " appending source to " << dest->get_address() << " target: " << target->get_address() << endl; } } // for } else { // if parent if (RoseBin_support::DEBUG_MODE()) cerr << " ERROR :: RET jumps :: no parent found for ret : " << target->class_name() << endl; //exit (0); } } // if ret #endif } if (RoseBin_support::DEBUG_MODE()) cerr << " >>>>>>>>> resolving RET jumps ... done." << endl; }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) try { using namespace BinaryAnalysis::InstructionSemantics; static const char *name = "Analysis"; using namespace InsnSemanticsExpr; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with // "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed assertion when ROSE tries to use // the solver. YicesSolver smt_solver; smt_solver.set_linkage(YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are annoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); // branch to analysis address #if 1 { // This is a kludge. If the first instruction is an indirect JMP then assume we're executing through a dynamic // linker thunk and execute the instruction concretely to advance the instruction pointer. SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(analysis_addr)); if (x86_jmp==insn->get_kind()) { VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType> p; X86InstructionSemantics<VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType>, VirtualMachineSemantics::ValueType> sem(p); p.set_map(args.thread->get_process()->get_memory()); // won't be thread safe sem.processInstruction(insn); policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value())); trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64, name, analysis_addr, p.readRegister<32>("eip").known_value()); } } #endif // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. size_t nbranches = 0; std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); if (policy.readRegister<32>("eip").is_known()) continue; bool complete; std::set<rose_addr_t> succs = insn->get_successors(&complete); if (complete && 2==succs.size()) { if (nbranches>=take_branch.size()) { std::ostringstream s; s<<policy.readRegister<32>("eip"); trace->mesg("%s: EIP = %s", name, s.str().c_str()); trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name); throw this; } // Decide whether we should take the branch or not. bool take = take_branch[nbranches++]; rose_addr_t target = 0; for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) { if ((take && *si!=insn->get_address()+insn->get_size()) || (!take && *si==insn->get_address()+insn->get_size())) target = *si; } assert(target!=0); trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target); // Is this path feasible? We don't really need to check it now; we could wait until the end. InternalNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(), LeafNode::create_integer(32, target)); constraints.push_back(c); // shouldn't really have to do this again if we could save some state if (smt_solver.satisfiable(constraints)) { policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target)); } else { trace->mesg("%s: chosen control flow path is not feasible.", name); break; } } } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<name <<": symbolic return value is " <<result <<"\n" <<name <<": return value has " <<vars.size() <<" variables:"; for (std::set<InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; s <<"\n"; if (!constraints.empty()) { s <<name <<": path constraints:\n"; for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci) s <<name <<": " <<*ci <<"\n"; } trace->mesg("%s", s.str().c_str()); } // Now give values to those bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name); std::vector<TreeNodePtr> exprs = constraints; LeafNodePtr result_var = LeafNode::create_variable(32); InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.get_definition(result_var)->isLeafNode(); if (!result_value) { trace->mesg("%s: evaluation result could not be determined. ERROR!", name); } else if (!result_value->is_known()) { trace->mesg("%s: evaluation result is not constant. ERROR!", name); } else { trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value()); } } else { trace->mesg("%s: expression is not satisfiable.", name); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name); std::vector<TreeNodePtr> exprs = constraints; InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); exprs.push_back(expr); if (smt_solver.satisfiable(exprs)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.get_definition(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("%s: v%"PRIu64" = %"PRIu64" %c", name, (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("%s: expression is not satisfiable. No solutions.", name); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; } catch (const Analysis*) { args.thread->get_process()->get_simulator()->activate(); throw; }
void visit(SgNode *node) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(node); if (insn) insns[insn->get_address()] = insn; }