void RiscOperators::dumpState() { Sawyer::Message::Stream out(thread_->tracing(TRACE_STATE)); out.enable(); out <<"Semantic state for thread " <<thread_->get_tid() <<":\n"; if (currentInstruction()) { out <<" instruction #" <<nInsns() <<" at " <<unparseInstructionWithAddress(currentInstruction()) <<"\n"; } else { out <<" processed " <<StringUtility::plural(nInsns(), "instructions") <<"\n"; } out <<" registers:\n"; BaseSemantics::Formatter format; format.set_line_prefix(" "); out <<(*currentState()->registerState()+format); out <<" memory:\n"; thread_->get_process()->mem_showmap(out, "memory:", " "); if (ARCH_X86 == architecture_) { out <<" segments:\n"; RegisterNames regNames(currentState()->registerState()->get_register_dictionary()); BOOST_FOREACH (const SegmentInfoMap::Node &node, segmentInfo_.nodes()) { RegisterDescriptor segreg(x86_regclass_segment, node.key(), 0, 16); out <<" " <<regNames(segreg) <<": base=" <<StringUtility::addrToString(node.value().base) <<" limit=" <<StringUtility::addrToString(node.value().limit) <<" present=" <<(node.value().present?"yes":"no") <<"\n"; } }
std::string NoOperation::StateNormalizer::toString(const BaseSemantics::DispatcherPtr &cpu, const BaseSemantics::StatePtr &state_) { BaseSemantics::StatePtr state = state_; BaseSemantics::RiscOperatorsPtr ops = cpu->get_operators(); if (!state) return ""; bool isCloned = false; // do we have our own copy of the state? // If possible and appropriate, remove the instruction pointer register const RegisterDescriptor regIp = cpu->instructionPointerRegister(); BaseSemantics::RegisterStateGenericPtr rstate = BaseSemantics::RegisterStateGeneric::promote(state->registerState()); if (rstate && rstate->is_partly_stored(regIp)) { BaseSemantics::SValuePtr ip = ops->readRegister(cpu->instructionPointerRegister()); if (ip->is_number()) { state = state->clone(); isCloned = true; rstate = BaseSemantics::RegisterStateGeneric::promote(state->registerState()); rstate->erase_register(regIp, ops.get()); } } // Get the memory state, cloning the state if not done so above. BaseSemantics::MemoryCellStatePtr mem = boost::dynamic_pointer_cast<BaseSemantics::MemoryCellState>(state->memoryState()); if (mem && !isCloned) { state = state->clone(); isCloned = true; mem = BaseSemantics::MemoryCellState::promote(state->memoryState()); } // Erase memory that has never been written (i.e., cells that sprang into existence by reading an address) of which appears // to have been recently popped from the stack. CellErasurePredicate predicate(ops, ops->readRegister(cpu->stackPointerRegister()), ignorePoppedMemory_); if (mem) mem->eraseMatchingCells(predicate); BaseSemantics::Formatter fmt; fmt.set_show_latest_writers(false); fmt.set_show_properties(false); std::ostringstream ss; ss <<(*state+fmt); return ss.str(); }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmX86Instruction*> { void visit(SgNode *node) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmX86Instruction *insn = si->second; insns.erase(si); BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); formatter.set_show_latest_writers(do_usedef); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { // Enable RiscOperators tracing, but turn off a bunch of info that makes comparisons with a known good answer // difficult. Sawyer::Message::PrefixPtr prefix = Sawyer::Message::Prefix::instance(); prefix->showProgramName(false); prefix->showThreadId(false); prefix->showElapsedTime(false); prefix->showFacilityName(Sawyer::Message::Prefix::NEVER); prefix->showImportance(false); Sawyer::Message::UnformattedSinkPtr sink = Sawyer::Message::StreamSink::instance(std::cout); sink->prefix(prefix); sink->defaultPropertiesNS().useColor = false; TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->stream().destination(sink); trace->stream().enable(); dispatcher = DispatcherX86::instance(trace, 32); } else { dispatcher = DispatcherX86::instance(operators, 32); } operators->set_solver(make_solver()); // The fpstatus_top register must have a concrete value if we'll use the x86 floating-point stack (e.g., st(0)) if (const RegisterDescriptor *REG_FPSTATUS_TOP = regdict->lookup("fpstatus_top")) { BaseSemantics::SValuePtr st_top = operators->number_(REG_FPSTATUS_TOP->get_nbits(), 0); operators->writeRegister(*REG_FPSTATUS_TOP, st_top); } #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }
// Show the register state for BaseSemantics::RegisterStateGeneric in the same format as for RegisterStateX86. This is // for comparison of the two register states when verifying results. It's also close to the format used by the old binary // semantics API. void show_state(const BaseSemantics::RiscOperatorsPtr &ops) { #if SEMANTIC_DOMAIN == MULTI_DOMAIN std::cout <<*ops; return; #endif struct ShowReg { BaseSemantics::RiscOperatorsPtr ops; std::ostream &o; std::string prefix; ShowReg(const BaseSemantics::RiscOperatorsPtr &ops, std::ostream &o, const std::string &prefix) : ops(ops), o(o), prefix(prefix) {} void operator()(const char *name, const char *abbr=NULL) { const RegisterDictionary *regdict = ops->get_state()->get_register_state()->get_register_dictionary(); const RegisterDescriptor *desc = regdict->lookup(name); assert(desc); (*this)(*desc, abbr?abbr:name); } void operator()(const RegisterDescriptor &desc, const char *abbr) { BaseSemantics::RegisterStatePtr regstate = ops->get_state()->get_register_state(); FormatRestorer fmt(o); o <<prefix <<std::setw(8) <<std::left <<abbr <<"= { "; fmt.restore(); BaseSemantics::SValuePtr val = regstate->readRegister(desc, ops.get()); o <<*val <<" }\n"; } void operator()(unsigned majr, unsigned minr, unsigned offset, unsigned nbits, const char *abbr) { (*this)(RegisterDescriptor(majr, minr, offset, nbits), abbr); } } show(ops, std::cout, " "); std::cout <<"registers:\n"; show("eax", "ax"); show("ecx", "cx"); show("edx", "dx"); show("ebx", "bx"); show("esp", "sp"); show("ebp", "bp"); show("esi", "si"); show("edi", "di"); show("es"); show("cs"); show("ss"); show("ds"); show("fs"); show("gs"); show("cf"); show(x86_regclass_flags, 0, 1, 1, "?1"); show("pf"); show(x86_regclass_flags, 0, 3, 1, "?3"); show("af"); show(x86_regclass_flags, 0, 5, 1, "?5"); show("zf"); show("sf"); show("tf"); show("if"); show("df"); show("of"); show(x86_regclass_flags, 0, 12, 1, "iopl0"); show(x86_regclass_flags, 0, 13, 1, "iopl1"); show("nt"); show(x86_regclass_flags, 0, 15, 1, "?15"); show("rf"); show("vm"); show(x86_regclass_flags, 0, 18, 1, "ac"); show(x86_regclass_flags, 0, 19, 1, "vif"); show(x86_regclass_flags, 0, 20, 1, "vip"); show(x86_regclass_flags, 0, 21, 1, "id"); show(x86_regclass_flags, 0, 22, 1, "?22"); show(x86_regclass_flags, 0, 23, 1, "?23"); show(x86_regclass_flags, 0, 24, 1, "?24"); show(x86_regclass_flags, 0, 25, 1, "?25"); show(x86_regclass_flags, 0, 26, 1, "?26"); show(x86_regclass_flags, 0, 27, 1, "?27"); show(x86_regclass_flags, 0, 28, 1, "?28"); show(x86_regclass_flags, 0, 29, 1, "?29"); show(x86_regclass_flags, 0, 30, 1, "?30"); show(x86_regclass_flags, 0, 31, 1, "?31"); show("eip", "ip"); BaseSemantics::Formatter memfmt; memfmt.set_line_prefix(" "); std::cout <<"memory:\n"; ops->get_state()->print_memory(std::cout, memfmt); }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmx86Instruction*> { void visit(SgNode *node) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmx86Instruction *insn = si->second; insns.erase(si); #if SEMANTIC_API == NEW_API BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->set_stream(stdout); dispatcher = DispatcherX86::instance(trace); } else { dispatcher = DispatcherX86::instance(operators); } operators->set_solver(make_solver()); #else // OLD_API typedef X86InstructionSemantics<MyPolicy, MyValueType> MyDispatcher; MyPolicy operators; MyDispatcher dispatcher(operators); # if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN operators.set_solver(make_solver()); SymbolicSemantics::Formatter formatter; formatter.expr_formatter.do_rename = true; formatter.expr_formatter.add_renames = true; # elif SEMANTIC_DOMAIN != FINDCONST_DOMAIN && SEMANTIC_DOMAIN != FINDCONSTABI_DOMAIN BaseSemantics::Formatter formatter; # endif #endif #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; #if SEMANTIC_API == NEW_API try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } #else // OLD API try { dispatcher.processInstruction(insn); # if SEMANTIC_DOMAIN == FINDCONST_DOMAIN || SEMANTIC_DOMAIN == FINDCONSTABI_DOMAIN operators.print(std::cout); # else operators.print(std::cout, formatter); # endif } catch (const MyDispatcher::Exception &e) { std::cout <<e <<"\n"; break; # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN } catch (const MyPolicy::Exception &e) { std::cout <<e <<"\n"; break; # endif } catch (const SMTSolver::Exception &e) { std::cout <<e <<" [ "<<unparseInstructionWithAddress(insn) <<"]\n"; break; } #endif /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ #if SEMANTIC_API == NEW_API BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); #else // OLD_API # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN || SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN MyValueType<32> ip = operators.get_ip(); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == NULL_DOMAIN || SEMANTIC_DOMAIN == INTERVAL_DOMAIN MyValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == MULTI_DOMAIN PartialSymbolicSemantics::ValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP) .get_subvalue(MyMultiSemanticsClass::SP0()); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # else if (operators.newIp->get().name) break; rose_addr_t next_addr = operators.newIp->get().offset; # endif #endif si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }