/* Assembles all instructions of an interpretation. ROSE allows blocks to be non-contiguous (i.e., an unconditiona jump can * appear in the middle of a basic block). However, we need to disassemble contiguous instructions. */ static void assemble_all(SgAsmInterpretation *interp) { size_t nassembled = 0; Assembler *assembler = Assembler::create(interp); ROSE_ASSERT(assembler!=NULL); InstructionCollector collector(interp); for (Disassembler::InstructionMap::iterator ii=collector.insns.begin(); ii!=collector.insns.end(); ++ii) { rose_addr_t original_va = ii->first; /* The new_va is the virtual address of the instruction now that we may have moved it to a new location in memory. * We're leaving this implementation for later. For now, just assume that instructions don't move in memory. */ rose_addr_t new_va = original_va; SgAsmx86Instruction *insn = isSgAsmx86Instruction(ii->second); ROSE_ASSERT(insn!=NULL); SgUnsignedCharList machine_code; try { insn->set_address(new_va); machine_code = assembler->assembleOne(insn); ROSE_ASSERT(!machine_code.empty()); ++nassembled; } catch (const Assembler::Exception &e) { std::cerr <<"assembly failed at " <<StringUtility::addrToString(e.insn->get_address()) <<": " <<e.what() <<std::endl; if (!assembler->get_debug()) { assembler->set_debug(stderr); try { assembler->assembleOne(insn); } catch (...) { /*void*/ } assembler->set_debug(false); } //return; } #if 0 /* Don't worry about writing the instruction back out to the section. [RPM 2011-08-23] */ /* We don't handle the case where an instruction grows because that could cause us to require that the section * containing the instruction grows, which opens a whole can of worms. */ ROSE_ASSERT(machine_code.size() <= insn->get_size()); /* We're using the same memory map as what was used when we loaded the binary and disassembled it. Therefore, the * machine code that we're writing back needs to fall within those same areas of the virtual address space: we cannot * write past the end of mapped memory, nor can we write to the space (if any) between mapped memory chunks. */ size_t nwritten = interp->get_map()->write(&(machine_code[0]), new_va, machine_code.size(), MemoryMap::MM_PROT_NONE); ROSE_ASSERT(nwritten==machine_code.size()); #endif } std::cout <<"Assembled " <<nassembled <<" instruction" <<(1==nassembled?"":"s") <<"\n"; delete assembler; }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) try { using namespace BinaryAnalysis::InstructionSemantics; static const char *name = "Analysis"; using namespace InsnSemanticsExpr; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with // "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed assertion when ROSE tries to use // the solver. YicesSolver smt_solver; smt_solver.set_linkage(YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are annoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); // branch to analysis address #if 1 { // This is a kludge. If the first instruction is an indirect JMP then assume we're executing through a dynamic // linker thunk and execute the instruction concretely to advance the instruction pointer. SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(analysis_addr)); if (x86_jmp==insn->get_kind()) { VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType> p; X86InstructionSemantics<VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType>, VirtualMachineSemantics::ValueType> sem(p); p.set_map(args.thread->get_process()->get_memory()); // won't be thread safe sem.processInstruction(insn); policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value())); trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64, name, analysis_addr, p.readRegister<32>("eip").known_value()); } } #endif // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. size_t nbranches = 0; std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); if (policy.readRegister<32>("eip").is_known()) continue; bool complete; std::set<rose_addr_t> succs = insn->get_successors(&complete); if (complete && 2==succs.size()) { if (nbranches>=take_branch.size()) { std::ostringstream s; s<<policy.readRegister<32>("eip"); trace->mesg("%s: EIP = %s", name, s.str().c_str()); trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name); throw this; } // Decide whether we should take the branch or not. bool take = take_branch[nbranches++]; rose_addr_t target = 0; for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) { if ((take && *si!=insn->get_address()+insn->get_size()) || (!take && *si==insn->get_address()+insn->get_size())) target = *si; } assert(target!=0); trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target); // Is this path feasible? We don't really need to check it now; we could wait until the end. InternalNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(), LeafNode::create_integer(32, target)); constraints.push_back(c); // shouldn't really have to do this again if we could save some state if (smt_solver.satisfiable(constraints)) { policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target)); } else { trace->mesg("%s: chosen control flow path is not feasible.", name); break; } } } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<name <<": symbolic return value is " <<result <<"\n" <<name <<": return value has " <<vars.size() <<" variables:"; for (std::set<InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; s <<"\n"; if (!constraints.empty()) { s <<name <<": path constraints:\n"; for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci) s <<name <<": " <<*ci <<"\n"; } trace->mesg("%s", s.str().c_str()); } // Now give values to those bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name); std::vector<TreeNodePtr> exprs = constraints; LeafNodePtr result_var = LeafNode::create_variable(32); InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.get_definition(result_var)->isLeafNode(); if (!result_value) { trace->mesg("%s: evaluation result could not be determined. ERROR!", name); } else if (!result_value->is_known()) { trace->mesg("%s: evaluation result is not constant. ERROR!", name); } else { trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value()); } } else { trace->mesg("%s: expression is not satisfiable.", name); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name); std::vector<TreeNodePtr> exprs = constraints; InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); exprs.push_back(expr); if (smt_solver.satisfiable(exprs)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.get_definition(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("%s: v%"PRIu64" = %"PRIu64" %c", name, (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("%s: expression is not satisfiable. No solutions.", name); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; } catch (const Analysis*) { args.thread->get_process()->get_simulator()->activate(); throw; }