Пример #1
0
 void show_results() {
     trace->mesg("CrcTable results: the following table entries were read by the specimen:");
     for (size_t i=0; i<seen.size(); i++) {
         if (seen[i]>0)
             trace->mesg("CrcTable results:   entry #%zu read %zu time%s", i, seen[i], 1==seen[i]?"":"s");
     }
 }
Пример #2
0
    virtual bool operator()(bool enabled, const Args &args) /*overrides*/ {
        if (enabled) {
            if (!triggered && args.insn->get_address()==when) {
                triggered = true;
                initialize_state(args.thread);
            }
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.insn);
            if (triggered && insn) {
                RTS_Message *m = args.thread->tracing(TRACE_MISC);
                m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str());
                policy.get_state().registers.ip = SymbolicSemantics::ValueType<32>(insn->get_address());
                semantics.processInstruction(insn);

                rose::BinaryAnalysis::SMTSolver::Stats smt_stats = yices.get_stats();
                m->mesg("%s: mem-cell list size: %zu elements\n", name, policy.get_state().memory.cell_list.size());
                m->mesg("%s: SMT stats: ncalls=%zu, input=%zu bytes, output=%zu bytes\n",
                        name, smt_stats.ncalls, smt_stats.input_size, smt_stats.output_size);
                yices.reset_stats();

#if 0
                std::ostringstream ss; ss <<policy;
                m->mesg("%s", ss.str().c_str());
#endif
            }
        }
        return enabled;
    }
Пример #3
0
 virtual bool operator()(bool enabled, const Args &args) /*overrides*/ {
     if (enabled) {
         if (!triggered && args.insn->get_address()==when) {
             triggered = true;
             initialize_register_intervals(args.thread);
         }
         SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.insn);
         if (triggered && insn) {
             RTS_Message *m = args.thread->tracing(TRACE_MISC);
             m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str());
             semantics.processInstruction(insn);
             std::ostringstream ss; ss <<policy;
             m->mesg("%s", ss.str().c_str());
         }
     }
     return enabled;
 }
Пример #4
0
/** Main driving function for clone detection.  This is the class that chooses inputs, runs each function, and looks at the
 *  outputs to decide how to partition the functions.  It does this repeatedly in order to build a PartitionForest. The
 *  analyze() method is the main entry point. */
class CloneDetector {
protected:
    static const char *name;            /**< For --debug output. */
    RSIM_Thread *thread;                /**< Thread where analysis is running. */
    PartitionForest partition;          /**< Partitioning of functions into similarity sets. */
    enum { MAX_ITERATIONS = 10 };       /**< Maximum number of times we run the functions; max number of input sets. */
    enum { MAX_SIMSET_SIZE = 3 };       /**< Any similarity set containing more than this many functions will be partitioned. */

public:
    CloneDetector(RSIM_Thread *thread): thread(thread) {}

    // Allocate a page of memory in the process address space.
    rose_addr_t allocate_page(rose_addr_t hint=0) {
        RSIM_Process *proc = thread->get_process();
        rose_addr_t addr = proc->mem_map(hint, 4096, MemoryMap::MM_PROT_RW, MAP_ANONYMOUS, 0, -1);
        assert((int64_t)addr>=0 || (int64_t)addr<-256); // disallow error numbers
        return addr;
    }

    // Obtain a memory map for disassembly
    MemoryMap *disassembly_map(RSIM_Process *proc) {
        MemoryMap *map = new MemoryMap(proc->get_memory(), MemoryMap::COPY_SHALLOW);
        map->prune(MemoryMap::MM_PROT_READ); // don't let the disassembler read unreadable memory, else it will segfault

        // Removes execute permission for any segment whose debug name does not contain the name of the executable. When
        // comparing two different executables for clones, we probably don't need to compare code that came from dynamically
        // linked libraries since they will be identical in both executables.
        struct Pruner: MemoryMap::Visitor {
            std::string exename;
            Pruner(const std::string &exename): exename(exename) {}
            virtual bool operator()(const MemoryMap*, const Extent&, const MemoryMap::Segment &segment_) {
                MemoryMap::Segment *segment = const_cast<MemoryMap::Segment*>(&segment_);
                if (segment->get_name().find(exename)==std::string::npos) {
                    unsigned p = segment->get_mapperms();
                    p &= ~MemoryMap::MM_PROT_EXEC;
                    segment->set_mapperms(p);
                }
                return true;
            }
        } pruner(proc->get_exename());
        map->traverse(pruner);
        return map;
    }

    // Get all the functions defined for this process image.  We do this by disassembling the entire process executable memory
    // and using CFG analysis to figure out where the functions are located.
    Functions find_functions(RTS_Message *m, RSIM_Process *proc) {
        m->mesg("%s triggered; disassembling entire specimen image...\n", name);
        MemoryMap *map = disassembly_map(proc);
        std::ostringstream ss;
        map->dump(ss, "  ");
        m->mesg("%s: using this memory map for disassembly:\n%s", name, ss.str().c_str());
        SgAsmBlock *gblk = proc->disassemble(false/*take no shortcuts*/, map);
        delete map; map=NULL;
        std::vector<SgAsmFunction*> functions = SageInterface::querySubTree<SgAsmFunction>(gblk);
#if 0 /*DEBUGGING [Robb P. Matzke 2013-02-12]*/
        // Prune the function list to contain only what we want.
        for (std::vector<SgAsmFunction*>::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
            if ((*fi)->get_name().compare("_Z1fRi")!=0)
                *fi = NULL;
        }
        functions.erase(std::remove(functions.begin(), functions.end(), (SgAsmFunction*)NULL), functions.end());
#endif
        return Functions(functions.begin(), functions.end());
    }

    // Perform a pointer-detection analysis on each function. We'll need the results in order to determine whether a function
    // input should consume a pointer or a non-pointer from the input value set.
    typedef std::map<SgAsmFunction*, CloneDetection::PointerDetector> PointerDetectors;
    PointerDetectors detect_pointers(RTS_Message *m, RSIM_Thread *thread, const Functions &functions) {
        // Choose an SMT solver. This is completely optional.  Pointer detection still seems to work fairly well (and much,
        // much faster) without an SMT solver.
        SMTSolver *solver = NULL;
#if 0   // optional code
        if (YicesSolver::available_linkage())
            solver = new YicesSolver;
#endif
        PointerDetectors retval;
        CloneDetection::InstructionProvidor *insn_providor = new CloneDetection::InstructionProvidor(thread->get_process());
        for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
            m->mesg("%s: performing pointer detection analysis for \"%s\" at 0x%08"PRIx64,
                    name, (*fi)->get_name().c_str(), (*fi)->get_entry_va());
            CloneDetection::PointerDetector pd(insn_providor, solver);
            pd.initial_state().registers.gpr[x86_gpr_sp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK);
            pd.initial_state().registers.gpr[x86_gpr_bp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK);
            //pd.set_debug(stderr);
            pd.analyze(*fi);
            retval.insert(std::make_pair(*fi, pd));
#if 1 /*DEBUGGING [Robb P. Matzke 2013-01-24]*/
            if (m->get_file()) {
                const CloneDetection::PointerDetector::Pointers plist = pd.get_pointers();
                for (CloneDetection::PointerDetector::Pointers::const_iterator pi=plist.begin(); pi!=plist.end(); ++pi) {
                    std::ostringstream ss;
                    if (pi->type & BinaryAnalysis::PointerAnalysis::DATA_PTR)
                        ss <<"data ";
                    if (pi->type & BinaryAnalysis::PointerAnalysis::CODE_PTR)
                        ss <<"code ";
                    ss <<"pointer at " <<pi->address;
                    m->mesg("   %s", ss.str().c_str());
                }
            }
#endif
        }
        return retval;
    }

    // Randomly choose a set of input values. The set will consist of the specified number of non-pointers and pointers. The
    // non-pointer values are chosen randomly, but limited to a certain range.  The pointers are chosen randomly to be null or
    // non-null and the non-null values each have one page allocated via simulated mmap() (i.e., the non-null values themselves
    // are not actually random).
    InputValues choose_inputs(size_t nintegers, size_t npointers) {
        static unsigned integer_modulus = 256;  // arbitrary;
        static unsigned nonnull_denom = 3;      // probability of a non-null pointer is 1/N
        CloneDetection::InputValues inputs;
        for (size_t i=0; i<nintegers; ++i)
            inputs.add_integer(rand() % integer_modulus);
        for (size_t i=0; i<npointers; ++i)
            inputs.add_pointer(rand()%nonnull_denom ? 0 : allocate_page());
        return inputs;
    }

    // Run a single function, look at its outputs, and insert it into the correct place in the PartitionForest
    void insert_function(SgAsmFunction *func, InputValues &inputs, CloneDetection::PointerDetector &pointers,
                         PartitionForest &partition, PartitionForest::Vertex *parent) {
        CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = fuzz_test(func, inputs, pointers);
        OutputValues concrete_outputs = outputs->get_values();
        partition.insert(func, concrete_outputs, parent);
    }

    // Analyze a single function by running it with the specified inputs and collecting its outputs. */
    CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *fuzz_test(SgAsmFunction *function, CloneDetection::InputValues &inputs,
                                                             const CloneDetection::PointerDetector &pointers) {
        RSIM_Process *proc = thread->get_process();
        RTS_Message *m = thread->tracing(TRACE_MISC);
        m->mesg("==========================================================================================");
        m->mesg("%s: fuzz testing function \"%s\" at 0x%08"PRIx64, name, function->get_name().c_str(), function->get_entry_va());

        // Not sure if saving/restoring memory state is necessary. I don't thing machine memory is adjusted by the semantic
        // policy's writeMemory() or readMemory() operations after the policy is triggered to enable our analysis.  But it
        // shouldn't hurt to save/restore anyway, and it's fast. [Robb Matzke 2013-01-14]
        proc->mem_transaction_start(name);
        pt_regs_32 saved_regs = thread->get_regs();

        // Trigger the analysis, resetting it to start executing the specified function using the input values and pointer
        // variable addresses we selected previously.
        thread->policy.trigger(function->get_entry_va(), &inputs, &pointers);

        // "Run" the function using our semantic policy.  The function will not "run" in the normal sense since: since our
        // policy has been triggered, memory access, function calls, system calls, etc. will all operate differently.  See
        // CloneDetectionSemantics.h and CloneDetectionTpl.h for details.
        try {
            thread->main();
        } catch (const Disassembler::Exception &e) {
            // Probably due to the analyzed function's RET instruction, but could be from other things as well. In any case, we
            // stop analyzing the function when this happens.
            m->mesg("%s: function disassembly failed at 0x%08"PRIx64": %s", name, e.ip, e.mesg.c_str());
        } catch (const CloneDetection::InsnLimitException &e) {
            // The analysis might be in an infinite loop, such as when analyzing "void f() { while(1); }"
            m->mesg("%s: %s", name, e.mesg.c_str());
        } catch (const RSIM_Semantics::InnerPolicy<>::Halt &e) {
            // The x86 HLT instruction appears in some functions (like _start) as a failsafe to terminate a process.  We need
            // to intercept it and terminate only the function analysis.
            m->mesg("%s: function executed HLT instruction at 0x%08"PRIx64, name, e.ip);
        }

        // Gather the function's outputs before restoring machine state.
        bool verbose = true;
        CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = thread->policy.get_outputs(verbose);
        thread->init_regs(saved_regs);
        proc->mem_transaction_rollback(name);
        return outputs;
    }
Пример #5
0
 virtual bool operator()(bool enabled, const Args &args) {
     if (enabled && !triggered && args.insn->get_address()==when) {
         triggered = true;
         RTS_Message *m = args.thread->tracing(TRACE_MISC);
         m->mesg("MemoryTransactionTester: triggered\n");
         RSIM_Process *proc = args.thread->get_process();
         proc->mem_showmap(m, "before starting transaction:\n");
         proc->mem_transaction_start("MemoryTransactionTester");
         proc->mem_showmap(m, "after starting transaction:\n");
     }
     return enabled;
 }
Пример #6
0
 virtual bool operator()(bool enabled, const Args &args) {
     // Trigger only if we're reading a table entry
     if (enabled && args.how==MemoryMap::READABLE && 4==args.nbytes &&
         args.va>=table_va && args.va<table_va+sizeof(table) && 0==(args.va-table_va)%4) {
         size_t idx = (args.va-table_va)/4;
         trace->mesg("CrcTable: read entry %zu = 0x%08"PRIx32, idx, table[idx]);
         seen[idx] += 1;
         memcpy(args.buffer, table+idx, 4);
         args.nbytes_xfer = 4;
         enabled = false;
     }
     return enabled;
 }
Пример #7
0
 virtual bool operator()(bool enabled, const Args &args) {
     if (enabled && args.insn->get_address()==trigger_va) {
         args.thread->get_process()->get_simulator()->deactivate();
         RTS_Message *m = args.thread->tracing(TRACE_MISC);
         m->mesg("disassembly triggered; disassembling now...\n");
         SgAsmBlock *gblk = args.thread->get_process()->disassemble(false); // full disassembly with partitioning
         AsmUnparser unparser;
         unparser.set_organization(org);
         unparser.unparse(std::cout, gblk);
         throw this; // to terminate specimen
     }
     return enabled;
 }
Пример #8
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) {
        using namespace rose::BinaryAnalysis::InstructionSemantics;

        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("Analysis triggered: analyzing function at 0x%08"PRIx64, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE uses the SMT solver to try to figure out
            // when memory address expressions might be aliases.  Since we're initializing some memory (the function argument)
            // using an address expression that we build here, ROSE needs to be able to figure out when the program also tries
            // to access the same memory but using an address expression that is generated by the analysis itself.  ROSE should
            // have been configured with "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed
            // assertion when ROSE tries to use the solver.
            rose::BinaryAnalysis::YicesSolver smt_solver;
            smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are anoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                //std::cout <<policy <<unparseInstructionWithAddress(insn) <<"\n";
                semantics.processInstruction(insn);
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<"Analysis: symbolic return value is " <<result <<"\n"
                  <<"Analysis: return value has " <<vars.size() <<" variables:";
                for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin();
                     vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those two bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("Analysis: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...");
                using namespace rose::BinaryAnalysis::InsnSemanticsExpr;
                std::vector<TreeNodePtr> exprs;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES==smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("Analysis: evaluation result could not be determined. ERROR!");
                    } else if (!result_value->is_known()) {
                        trace->mesg("Analysis: evaluation result is not constant. ERROR!");
                    } else {
                        trace->mesg("Analysis: evaluation result is 0x%08"PRIx64, result_value->get_value());
                    }
                } else {
                    trace->mesg("Analysis: expression is not satisfiable (or unknown). ERROR!");
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("Analysis: setting result equal to 0xff015e7c and trying to find inputs...");
                using namespace rose::BinaryAnalysis::InsnSemanticsExpr;
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                        LeafNode::create_integer(32, 0xff015e7c));
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(expr)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("Analysis:   v%"PRIu64" = %"PRIu64" %c",
                                        (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("Analysis:   expression is not satisfiable (or unknown).  No solutions.");
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    }
Пример #9
0
    // Detect functions that are semantically similar by running multiple iterations of partition_functions().
    void analyze() {
        RTS_Message *m = thread->tracing(TRACE_MISC);
        Functions functions = find_functions(m, thread->get_process());
        PointerDetectors pointers = detect_pointers(m, thread, functions);
        PartitionForest partition;
        while (partition.nlevels()<MAX_ITERATIONS) {
            InputValues inputs = choose_inputs(3, 3);
            size_t level = partition.new_level(inputs);
            m->mesg("####################################################################################################");
            m->mesg("%s: fuzz testing %zu function%s at level %zu", name, functions.size(), 1==functions.size()?"":"s", level);
            m->mesg("%s: using these input values:\n%s", name, inputs.toString().c_str());

            if (0==level) {
                partition_functions(m, partition, functions, pointers, inputs, NULL);
            } else {
                const PartitionForest::Vertices &parent_vertices = partition.vertices_at_level(level-1);
                for (PartitionForest::Vertices::const_iterator pvi=parent_vertices.begin(); pvi!=parent_vertices.end(); ++pvi) {
                    PartitionForest::Vertex *parent_vertex = *pvi;
                    if (parent_vertex->functions.size()>MAX_SIMSET_SIZE)
                        partition_functions(m, partition, parent_vertex->functions, pointers, inputs, parent_vertex);
                }
            }

            // If the new level doesn't contain any vertices then we must not have needed to repartition anything and we're all
            // done.
            if (partition.vertices_at_level(level).empty())
                break;
        }

        m->mesg("==========================================================================================");
        m->mesg("%s: The entire partition forest follows...", name);
        m->mesg("%s", StringUtility::prefixLines(partition.toString(), std::string(name)+": ").c_str());

        m->mesg("==========================================================================================");
        m->mesg("%s: Final function similarity sets are:", name);
        PartitionForest::Vertices leaves = partition.get_leaves();
        size_t setno=0;
        for (PartitionForest::Vertices::iterator vi=leaves.begin(); vi!=leaves.end(); ++vi, ++setno) {
            PartitionForest::Vertex *leaf = *vi;
            const Functions &functions = leaf->get_functions();
            m->mesg("%s:   set #%zu at level %zu has %zu function%s:",
                    name, setno, leaf->get_level(), functions.size(), 1==functions.size()?"":"s");
            for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi)
                m->mesg("%s:     0x%08"PRIx64" <%s>", name, (*fi)->get_entry_va(), (*fi)->get_name().c_str());
        }

        m->mesg("%s: dumping final similarity sets to clones.sql", name);
        partition.dump("clones.sql", "NO_USER", "NO_PASSWD");
    }
Пример #10
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) try {
        using namespace rose::BinaryAnalysis::InstructionSemantics;

        static const char *name = "Analysis";
        using namespace rose::BinaryAnalysis::InsnSemanticsExpr;
        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with
            // "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed assertion when ROSE tries to use
            // the solver.
            rose::BinaryAnalysis::YicesSolver smt_solver;
            smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are annoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));            // branch to analysis address

#if 1
            {
                // This is a kludge.  If the first instruction is an indirect JMP then assume we're executing through a dynamic
                // linker thunk and execute the instruction concretely to advance the instruction pointer.
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr));
                if (x86_jmp==insn->get_kind()) {
                    PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p;
                    X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State,
                                                                             PartialSymbolicSemantics::ValueType>,
                                            PartialSymbolicSemantics::ValueType> sem(p);
                    MemoryMap p_map = args.thread->get_process()->get_memory();
                    BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments())
                        segment.buffer()->copyOnWrite(true);
                    p.set_map(&p_map); // won't be thread safe
                    sem.processInstruction(insn);
                    policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value()));
                    trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64,
                                name, analysis_addr, p.readRegister<32>("eip").known_value());
                }
            }
#endif

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            size_t nbranches = 0;
            std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str());
                semantics.processInstruction(insn);
                if (policy.readRegister<32>("eip").is_known())
                    continue;
                
                bool complete;
                std::set<rose_addr_t> succs = insn->getSuccessors(&complete);
                if (complete && 2==succs.size()) {
                    if (nbranches>=take_branch.size()) {
                        std::ostringstream s; s<<policy.readRegister<32>("eip");
                        trace->mesg("%s: EIP = %s", name, s.str().c_str());
                        trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name);
                        throw this;
                    }

                    // Decide whether we should take the branch or not.
                    bool take = take_branch[nbranches++];
                    rose_addr_t target = 0;
                    for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) {
                        if ((take && *si!=insn->get_address()+insn->get_size()) ||
                            (!take && *si==insn->get_address()+insn->get_size()))
                            target = *si;
                    }
                    assert(target!=0);
                    trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target);

                    // Is this path feasible?  We don't really need to check it now; we could wait until the end.
                    TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(),
                                                         LeafNode::create_integer(32, target));
                    constraints.push_back(c); // shouldn't really have to do this again if we could save some state
                    if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) {
                        policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target));
                    } else {
                        trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name);
                        break;
                    }
                }
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<name <<": symbolic return value is " <<result <<"\n"
                  <<name <<": return value has " <<vars.size() <<" variables:";
                for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin();
                     vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                s <<"\n";
                if (!constraints.empty()) {
                    s <<name <<": path constraints:\n";
                    for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci)
                        s <<name <<":   " <<*ci <<"\n";
                }
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("%s: evaluation result could not be determined. ERROR!", name);
                    } else if (!result_value->is_known()) {
                        trace->mesg("%s: evaluation result is not constant. ERROR!", name);
                    } else {
                        trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value());
                    }
                } else {
                    trace->mesg("%s: expression is not satisfiable. (or unknown)", name);
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                        LeafNode::create_integer(32, 0xff015e7c));
                exprs.push_back(expr);
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("%s:   v%"PRIu64" = %"PRIu64" %c",
                                        name, (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("%s:   expression is not satisfiable (or unknown).  No solutions.", name);
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    } catch (const Analysis*) {
        args.thread->get_process()->get_simulator()->activate();
        throw;
    }