Example #1
0
/**************************************************************************
 * Main function. This function is run on each node that is being traversed
 * in the graph. For each node, we determine the successors and check
 * if those have been previously seen. If yes, a cycle may exist.
 **************************************************************************/
bool 
CompassAnalyses::CycleDetection::Traversal::run(string& name, SgGraphNode* node,
                                                SgGraphNode* previous){
  // check known function calls and resolve variables
  ROSE_ASSERT(node);

  //cerr << " cycledetection->run " << node->get_name() << endl;
  SgAsmFunction* func = isSgAsmFunction(node->get_SgNode());
  if (func) {
    // if the node is a function, we clear the visited nodes
    // this should speed up our search
    visited.clear();
    return false;
  }
  successors.clear();
  ROSE_ASSERT(vizzGraph);
  vizzGraph->getSuccessors(node, successors);    
  vector<SgGraphNode*>::iterator succ = successors.begin();
  for (;succ!=successors.end();++succ) {
    // for each successor do...
    SgGraphNode* next = *succ;
    // if the node is an instruction, we check if it was visited
    // if not, we add it to the visited set, otherwise a cycle is present
    std::set<SgGraphNode*>::iterator it =visited.find(next);
    if (it!=visited.end()) {
      // found this node in visited list
      SgAsmX86Instruction* nodeSg = isSgAsmX86Instruction(node->get_SgNode());
      SgAsmX86Instruction* nextSg = isSgAsmX86Instruction(next->get_SgNode());
      if (debug) {
        std::string outputText = "Found possible cycle between  ";
        outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and ";
        outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")";
        std::cerr << outputText << std::endl;
        output->addOutput(new CheckerOutput(nodeSg, outputText));
      }
      bool validCycle = checkIfValidCycle(node,next);
      if (validCycle) {
        std::string outputText = "Found cycle between  ";
        outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and ";
        outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")";
        std::cerr << outputText << std::endl;
        output->addOutput(new CheckerOutput(nodeSg, outputText));
	cycleFound[node]=next;
      } else {
	if (debug)
	  std::cerr << "This is not a cyclic node "  << std::endl;
      }
    }
  }
  visited.insert(node);
  return false;
}
Example #2
0
int
main(int argc, char *argv[])
{
    // Parse command-line
    int argno=1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else {
            std::cerr <<argv[0] <<": unrecognized switch: " <<argv[argno] <<"\n";
            exit(1);
        }
    }
    if (argno+1!=argc) {
        std::cerr <<"usage: " <<argv[0] <<" [SWITCHES] [--] SPECIMEN\n";
        exit(1);
    }
    std::string specimen_name = argv[argno++];
            
    // Open the file
    rose_addr_t start_va = 0;
    MemoryMap map;
    size_t file_size = map.insertFile(specimen_name, start_va);
    map.at(start_va).limit(file_size).changeAccess(MemoryMap::EXECUTABLE, 0);

    // Try to disassemble every byte, and print the CALL/FARCALL targets
    size_t ninsns=0, nerrors=0;
    Disassembler *disassembler = new DisassemblerX86(4);
    for (rose_addr_t offset=0; offset<file_size; ++offset) {
        try {
            rose_addr_t insn_va = start_va + offset;
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(disassembler->disassembleOne(&map, insn_va));
            if (insn && (x86_call==insn->get_kind() || x86_farcall==insn->get_kind())) {
                ++ninsns;
                rose_addr_t target_va;
                if (insn->getBranchTarget(&target_va))
                    std::cout <<StringUtility::addrToString(insn_va) <<": " <<StringUtility::addrToString(target_va) <<"\n";
            }
        } catch (const Disassembler::Exception &e) {
            ++nerrors;
        }
    }

    std::cerr <<specimen_name <<": " <<ninsns <<" instructions; " <<nerrors <<" errors\n";
    return 0;
}
Example #3
0
/** Returns a string containing the specified operand. */
std::string unparseX86Expression(SgAsmExpression *expr, const AsmUnparser::LabelMap *labels,
                                 const RegisterDictionary *registers) {
    /* Find the instruction with which this expression is associated. */
    SgAsmX86Instruction *insn = NULL;
    for (SgNode *node=expr; !insn && node; node=node->get_parent()) {
        insn = isSgAsmX86Instruction(node);
    }
    ASSERT_not_null(insn);
    return unparseX86Expression(expr, labels, registers, insn->get_kind()==x86_lea);
}
// see base class
bool
SgAsmX86Instruction::isFunctionCallFast(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va)
{
    if (insns.empty())
        return false;
    SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back());
    if (!last)
        return false;

    // Quick method based only on the kind of instruction
    if (x86_call==last->get_kind() || x86_farcall==last->get_kind()) {
        last->getBranchTarget(target);
        if (return_va)
            *return_va = last->get_address() + last->get_size();
        return true;
    }

    return false;
}
bool
RoseBin_DataFlowAnalysis::existsPath(SgGraphNode* start, SgGraphNode* end) {
  // make sure its not a SgAsmCall and the next node is a DirectedControlFlowEdge
  ROSE_ASSERT(g_algo->info);
  bool exists = false;
  ROSE_ASSERT(start);
  ROSE_ASSERT(end);
  SgAsmX86Instruction* next = isSgAsmX86Instruction(start);
  SgAsmX86Instruction* endAsm = isSgAsmX86Instruction(end);
  if (next && endAsm) {
    while (next!=endAsm) {
      next = isSgAsmX86Instruction(next->cfgBinFlowOutEdge(g_algo->info));
      if (next==NULL)
        break;
      if ((next->get_kind() == x86_call || next->get_kind() == x86_ret) && next!=endAsm)
        break;
    }
    exists = true;
  }
  return exists;
}
Example #6
0
/** Add edges to graph from functions that call system calls to system calls.
 *
 *  The first 1000 vertexes (0 to 999) in the graph is reserved for system calls, which is many more than the actual system
 *  calls in linux.  */
void
add_syscall_edges(DirectedGraph* G, std::vector<SgAsmFunction*>& all_functions)
{
    // Detect all system calls and add an edge from the function to the function to the system call
    for (unsigned int caller_id = 0; caller_id < all_functions.size(); ++caller_id) {
        SgAsmFunction *func = all_functions[caller_id];

        std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func);

        for (std::vector<SgAsmInstruction*>::iterator inst_it = insns.begin(); inst_it != insns.end(); ++inst_it) {
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(*inst_it);
            if (insn == NULL)
                continue;

            SgAsmBlock *block = SageInterface::getEnclosingNode<SgAsmBlock>(insn);

            // On linux system calls are always interrups and all interrupts are system calls
            if (insn && block && insn->get_kind()==x86_int) {

                const SgAsmExpressionPtrList &opand_list = insn->get_operandList()->get_operands();
                SgAsmExpression *expr = opand_list.size()==1 ? opand_list[0] : NULL;

                //semantically execute the basic block to find out which sytem call was called
                if (expr && expr->variantT()==V_SgAsmIntegerValueExpression &&
                    0x80==isSgAsmIntegerValueExpression(expr)->get_value()) {

                    const SgAsmStatementPtrList &stmts = block->get_statementList();
                    size_t int_n;
                    for (int_n=0; int_n<stmts.size(); int_n++) {
                        if (isSgAsmInstruction(stmts[int_n])==insn)
                            break;
                    }

                    typedef PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State,
                                                             PartialSymbolicSemantics::ValueType> Policy;
                    typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
                    Policy policy;
                    Semantics semantics(policy);

                    try {
                        semantics.processBlock(stmts, 0, int_n);
                        if (policy.readRegister<32>("eax").is_known()) {
                            int nr = policy.readRegister<32>("eax").known_value();
                            boost::add_edge(caller_id, nr, *G);
                        }
                    } catch (const Semantics::Exception&) {
                    } catch (const Policy::Exception&) {
                    }
                }
            }
        }
    }
}
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) {
  assert (minNumStepsToFindError >= 1); // Can't find an error on the first step
  assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows
  assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0);
  makeRegMap(origRegisterMap, "");
  makeRegMapZero(newRegisterMap);
  isValidIp = false_();
  validIPs.clear();
  Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1));
  addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount)));
  resetState = problem.build_op_eq(stepCount, zero(32));
  errorsEnabled =
    problem.build_op_and(
      problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)),
      (maxNumStepsToFindError == 0 ?
       true_() :
       problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError))));
  {
    vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction);
    for (size_t i = 0; i < functions.size(); ++i) {
      functionStarts.push_back(isSgAsmFunction(functions[i])->get_address());
      // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address());
    }
  }
  {
    vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock);
    for (size_t i = 0; i < blocks.size(); ++i) {
      SgAsmBlock* b = isSgAsmBlock(blocks[i]);
      if (!b->get_statementList().empty() && isSgAsmX86Instruction(b->get_statementList().front())) {
        blockStarts.push_back(b->get_address());
        // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address());
      }
    }
  }
  {
    vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);
    for (size_t i = 0; i < calls.size(); ++i) {
      SgAsmX86Instruction* b = isSgAsmX86Instruction(calls[i]);
      if (b->get_kind() != x86_call) continue;
      returnPoints.push_back(b->get_address() + b->get_raw_bytes().size());
      // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size());
    }
  }
  {
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmX86Instruction* b = isSgAsmX86Instruction(instructions[i]);
      validIPs.push_back(b->get_address());
    }
  }
}
 virtual void visit(SgNode* n) {
     SgAsmX86Instruction* insn = isSgAsmX86Instruction(n);
     if (!insn) return;
     if (insn->get_kind() != x86_call) return;
     //cerr << "Found call xxx at " << hex << insn->get_address() << endl;
     uint64_t tgtAddr;
     if (!insn->getBranchTarget(&tgtAddr)) return;
     //cerr << "Found call at " << hex << insn->get_address() << " with known target " << hex << tgtAddr << endl;
     SgAsmInstruction* tgt = info->getInstructionAtAddress(tgtAddr);
     if (!tgt) return;
     //cerr << "Found target insn" << endl;
     SgNode* f = tgt;
     while (f && !isSgAsmBlock(f) && !isSgAsmFunction(f)) f = f->get_parent();
     if (!f) return;
     //cerr << "Found function of target" << endl;
     uint64_t next = insn->get_address() + insn->get_raw_bytes().size();
     info->returnTargets[isSgAsmStatement(f)].insert(next);
 }
Example #9
0
// Run natively and return number of instructions executed and reason for termination.
static std::pair<size_t, std::string>
runNatively(const Settings &settings, const std::string &specimenName, Sawyer::Optional<rose_addr_t> initVa,
            const P2::Partitioner &partitioner, rose_addr_t randomAddress) {
    Stream debug(mlog[DEBUG]);

    BinaryDebugger debugger(specimenName);
    if (debugger.isTerminated()) {
        mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated() <<" before we could gain control\n";
        exit(1);
    }

    // Allow child to run until we hit the desired address.
    if (initVa) {
        debugger.setBreakpoint(*initVa);
        debugger.runToBreakpoint();
        debugger.clearBreakpoint(*initVa);
        if (debugger.isTerminated()) {
            mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated()
                        <<" without reaching " <<addrToString(*initVa) <<"\n";
            exit(1);
        }
    }
    
    // Show specimen address map so we can verify that the Linux loader used the same addresses we used.
    // We could have shown it earlier, but then we wouldn't have seen the results of dynamic linking.
    if (settings.showMaps) {
        std::cout <<"Linux loader specimen memory map:\n";
        system(("cat /proc/" + numberToString(debugger.isAttached()) + "/maps").c_str());
    }

    // Branch to the starting address
    debug <<"branching to " <<addrToString(randomAddress) <<"\n";
    debugger.executionAddress(randomAddress);

    std::string terminationReason;
    size_t nExecuted = 0;                               // number of instructions executed
    while (1) {
        // Check for and avoid system calls if necessary
        if (!settings.allowSyscalls) {
            rose_addr_t eip = debugger.executionAddress();
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(partitioner.instructionProvider()[eip]);
            if (!insn || insn->isUnknown()) {
                if (settings.showInsnTrace)
                    std::cout <<"at " <<addrToString(eip) <<": " <<(insn?"no":"unknown") <<" instruction\n";
                terminationReason = "executed at " + addrToString(eip) +" which we don't know about";
                break;
            }
            if (settings.showInsnTrace)
                std::cout <<"at " <<unparseInstructionWithAddress(insn) <<"\n";
            if (insn->get_kind() == x86_int || insn->get_kind() == x86_sysenter) {
                terminationReason = "tried to execute a system call";
                break;
            }
        }

        // Single-step
        if (debug)
            debug <<"single stepping at " <<addrToString(debugger.executionAddress()) <<"\n";
        debugger.singleStep();
        if (debugger.isTerminated()) {
            terminationReason = debugger.howTerminated();
            break;
        }
        ++nExecuted;
        if (settings.maxInsns!=0 && nExecuted>=settings.maxInsns) {
            terminationReason = "reached instruction limit";
            break;
        }
    }
    debugger.terminate();
    return std::make_pair(nExecuted, terminationReason);
}
/***********************************************************************
 * (10/31/07) tps: Traverses the graph for each node in rootNodes
 * and applies to each node the evaluate function
 * which can be either def_use, variable detection or emulation
 * Each node in the controlflow of rootNode is traversed (forward)
 * and only if the hasChanged function returns false, the algorithm
 * comes to a fixpoint
 ***********************************************************************/
void
RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes,
                                        RoseBin_DataFlowAbstract* analysis,
                                        bool interprocedural){
  if (RoseBin_support::DEBUG_MODE_MIN())
    cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) <<
      "  debug_min : " <<  RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl;
  // Number of functions traversed
  int funcNr =0;
  // ---------------------------------------------------------------------
  // stores the nodes that still needs to be visited
  //  vector<SgGraphNode*> worklist;
  deque<SgGraphNode*> worklist;
  nodeHashSetType worklist_hash;
  // a vector of successors of the current node
  vector<SgGraphNode*> successors;
  // ---------------------------------------------------------------------


  // iterate through all functions
  vector<SgGraphNode*>::iterator it = rootNodes.begin();
  for (; it!=rootNodes.end();++it) {
    // current node
    SgGraphNode* node = *it;

    string func_name = vizzGraph->getProperty(SgGraph::name, node);
    RoseBin_support::checkText(func_name);
    funcNr++;
    if (RoseBin_support::DEBUG_MODE()) {
      cout << "\n\n -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited << endl;
      // debug
    }
    if (RoseBin_support::DEBUG_MODE_MIN()) {
      cerr << " -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited <<
        "  def size  : " << analysis->getDefinitionSize() << endl;
    }

    // indicates whether the current value for this node has changed
    bool hasChanged=false;
    // pushback into worklist and visited list
    worklist.push_back(node);
    worklist_hash.insert(node);
    visited.insert(node);
    visitedCounter[node] = 1;
    vector <SgGraphNode*> pre;
    // while there are still graph nodes in the worklist do

    while (worklist.size()>0) {
      nrOfNodesVisited++;
      // the new node is taken from the back of the worklist
      //node = worklist.back();
      //worklist.pop_back();
      node = worklist.front();
      worklist.pop_front();

      worklist_hash.erase(node);
      // get the successors of the current node and store in successors vector
      string name = vizzGraph->getProperty(SgGraph::name, node);

      //if (RoseBin_support::DEBUG_MODE_MIN() && node)
      //        if (node->get_SgNode())
      //  cerr << node->get_SgNode()->class_name() << "  " << node << "  " << node->get_name() << endl;

      if (RoseBin_support::DEBUG_MODE_MIN() && node) {
        SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode());
        if (instr) {
          SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent());
          if (funcParent) {
            string parent = funcParent->get_name();
            cout << " ---- analysis of node in function : " << parent <<
              "  defs " << analysis->getDefinitionSize() <<
              " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl;
          }
        }
      }


      if (RoseBin_support::DEBUG_MODE())
        cout << "\n evaluating: " << name << endl;
      // do something with the current node
      // e.g. checkVariables(name, node);
      SgGraphNode* nodeBefore= NULL;
      BeforeMapType::const_iterator it =
        nodeBeforeMap.find(node);
      if (it!=nodeBeforeMap.end())
        nodeBefore = it->second;
      // successor vector is empty on each new node
      successors.clear();
      ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph));
      isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors);

      hasChanged = analysis->run(name, node, nodeBefore);

      // append the successors to the worklist
      if (RoseBin_support::DEBUG_MODE())
        cout << ">> getting successors  (" << successors.size() << ") for : " << name << endl;
      //        if (successors.size()==0)
      //          cout << "PROBLEM ..................................................... : " << endl;
      vector<SgGraphNode*>::iterator succ = successors.begin();
      for (;succ!=successors.end();++succ) {
        // for each successor do...
        SgGraphNode* next = *succ;
        SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode());
        //if (!nodeN) continue;
        SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode());
        //if (!nextN) continue;

        string name_n = vizzGraph->getProperty(SgGraph::name, next);



        bool call = false;
        bool exceptionCallNext = false;
        if (nextN)
          exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0);
        bool exceptionCallNode = false;
        if (nodeN)
          exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0);
        if (RoseBin_support::DEBUG_MODE())
          std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl;
        // if function call is call to malloc we have an exception and follow the call path
        if ((exceptionCallNode && !exceptionCallNext)) {
        } else if (
                   //if (
                   (nodeN && nodeN->get_kind() == x86_call) ||
                   (nextN && nextN->get_kind() == x86_ret) )
          call = true;
        //bool sameParent = analysis->sameParents(node, next);

        bool validNode=false;
        if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode)
          validNode = true;

        // debug ------------------------
        if (RoseBin_support::DEBUG_MODE()) {
          string nodeBeforeStr="";
          if (nodeBefore) nodeBeforeStr= nodeBefore->get_name();
          cout << "  DEBUG : >>>>>>>> previous node " << nodeBeforeStr
               << "      This node : " << name << "  next node : " << name_n
               << "  ** validNode : " << RoseBin_support::resBool(validNode) << endl;
        }


        // ----------------------------------
        if (( interprocedural==false && !call) //
            ||  (interprocedural==true && validNode)) {
          if (visited.find(next)==visited.end()) {
            // if the successor is not yet visited
            // mark as visited and put into worklist
            if (RoseBin_support::DEBUG_MODE())
              cout << " never visited next node before... " << name_n <<
                " interprocedural : " << interprocedural << "  call : " << call << endl;
            if (RoseBin_support::DEBUG_MODE())
              cout << "adding to visited : " << name_n << endl;

            visited.insert(next);
            nodeBeforeMap[next]=node;
            visitedCounter[next]=1;
            vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1));
            if (!containsHash(worklist_hash,next)) {
              // add next node only if the next node
              if (RoseBin_support::DEBUG_MODE())
                cout << "adding to worklist: " << name_n << endl;
              worklist.push_back(next);
              worklist_hash.insert(next);
            }
          } else {
            // if the successor has been visited, we need to check if it has changed
            // if it has not, we continue, else we need to push it back to the worklist
            int nr = visitedCounter[next];
            if (RoseBin_support::DEBUG_MODE())
              cout << " visited next node before... " << RoseBin_support::ToString(nr) <<
                "  Changed == " << RoseBin_support::resBool(hasChanged) << endl;

            if (hasChanged) {
              visitedCounter[next]=++nr;
              vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr));
              if (RoseBin_support::DEBUG_MODE())
                cout << " has changed : " << RoseBin_support::resBool(hasChanged) <<
                  "  -- interprocedural : " << RoseBin_support::resBool(interprocedural) <<
                  "  -- Call : " << RoseBin_support::resBool(call) <<
                  "  ------> new number: " << RoseBin_support::ToString(nr) <<
                  "  -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) <<
                  "  ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) <<
                  "  ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize())
                     << endl;

              if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) {
                if (!containsHash(worklist_hash,next)) {
                  worklist_hash.insert(next);
                  worklist.push_back(next);
                  if (RoseBin_support::DEBUG_MODE())
                    cout << " adding to worklist: " << name_n << endl;
                }
              }
            } else
              if (RoseBin_support::DEBUG_MODE())
                cout << " has NOT changed. " << endl;
            //else we continue with the next node
          }
        }
      } // for
    } // while worklist.size()>0

  } // for rootNodes
}
Example #11
0
/* Analyze a single interpretation a block at a time */
static void
analyze_interp(SgAsmInterpretation *interp)
{
    /* Get the set of all instructions except instructions that are part of left-over blocks. */
    struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmX86Instruction*> {
        void visit(SgNode *node) {
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(node);
            SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn);
            if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
                insert(std::make_pair(insn->get_address(), insn));
        }
    } insns;
    insns.traverse(interp, postorder);

    while (!insns.empty()) {
        std::cout <<"=====================================================================================\n"
                  <<"=== Starting a new basic block                                                    ===\n"
                  <<"=====================================================================================\n";
        AllInstructions::iterator si = insns.begin();
        SgAsmX86Instruction *insn = si->second;
        insns.erase(si);

        BaseSemantics::RiscOperatorsPtr operators = make_ops();
        BaseSemantics::Formatter formatter;
        formatter.set_suppress_initial_values();
        formatter.set_show_latest_writers(do_usedef);
        BaseSemantics::DispatcherPtr dispatcher;
        if (do_trace) {
            // Enable RiscOperators tracing, but turn off a bunch of info that makes comparisons with a known good answer
            // difficult.
            Sawyer::Message::PrefixPtr prefix = Sawyer::Message::Prefix::instance();
            prefix->showProgramName(false);
            prefix->showThreadId(false);
            prefix->showElapsedTime(false);
            prefix->showFacilityName(Sawyer::Message::Prefix::NEVER);
            prefix->showImportance(false);
            Sawyer::Message::UnformattedSinkPtr sink = Sawyer::Message::StreamSink::instance(std::cout);
            sink->prefix(prefix);
            sink->defaultPropertiesNS().useColor = false;
            TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators);
            trace->stream().destination(sink);
            trace->stream().enable();
            dispatcher = DispatcherX86::instance(trace, 32);
        } else {
            dispatcher = DispatcherX86::instance(operators, 32);
        }
        operators->set_solver(make_solver());

        // The fpstatus_top register must have a concrete value if we'll use the x86 floating-point stack (e.g., st(0))
        if (const RegisterDescriptor *REG_FPSTATUS_TOP = regdict->lookup("fpstatus_top")) {
            BaseSemantics::SValuePtr st_top = operators->number_(REG_FPSTATUS_TOP->get_nbits(), 0);
            operators->writeRegister(*REG_FPSTATUS_TOP, st_top);
        }

#if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN
        BaseSemantics::SValuePtr orig_esp;
        if (do_test_subst) {
            // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated
            // in the state, which is printed in the output, and thus changes the answer.
            BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large();
            orig_esp = operators->readRegister(*regdict->lookup("esp"));
            std::cout <<"Original state:\n" <<*operators;
        }
#endif

        /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of
         * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already
         * been processed. */
        while (1) {
            /* Analyze current instruction */
            std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n";
            try {
                dispatcher->processInstruction(insn);
#   if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/
                show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output
#   else
                std::cout <<(*operators + formatter);
#   endif
            } catch (const BaseSemantics::Exception &e) {
                std::cout <<e <<"\n";
            }

            /* Never follow CALL instructions */
            if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall)
                break;

            /* Get next instruction of this block */
            BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip"));
            if (!ip->is_number())
                break;
            rose_addr_t next_addr = ip->get_number();
            si = insns.find(next_addr);
            if (si==insns.end()) break;
            insn = si->second;
            insns.erase(si);
        }

        // Test substitution on the symbolic state.
#if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN
        if (do_test_subst) {
            SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp);
            BaseSemantics::SValuePtr newvar = operators->undefined_(32);
            newvar->set_comment("frame_pointer");
            SymbolicSemantics::SValuePtr to =
                SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4)));
            std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n";
            SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to);
            std::cout <<"Substituted state:\n" <<(*operators+formatter);
        }
#endif
    }
}
Example #12
0
int main(int argc, char** argv) {

  std::string binaryFilename = (argc >= 1 ? argv[argc-1]   : "" );
  std::vector<std::string> newArgv(argv,argv+argc);
  newArgv.push_back("-rose:output");
  newArgv.push_back(binaryFilename+"-binarySemantics.C");

  SgProject* proj = frontend(newArgv);
  
  ROSE_ASSERT (proj);
  SgSourceFile* newFile = isSgSourceFile(proj->get_fileList().front());
  ROSE_ASSERT(newFile != NULL);
  SgGlobal* g = newFile->get_globalScope();
  ROSE_ASSERT (g);

  //I am doing some experimental work to enable functions in the C representation
  //Set this flag to true in order to enable that work
  bool enable_functions = true;
  //Jeremiah did some work to enable a simplification and normalization of the 
  //C representation. Enable this work by setting this flag to true.
  bool enable_normalizations = false;

  vector<SgNode*> asmFiles = NodeQuery::querySubTree(proj, V_SgAsmGenericFile);
  ROSE_ASSERT (asmFiles.size() == 1);



  if( enable_functions == false)
  {
    //Representation of C normalizations withotu functions
    SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration("run", SgTypeVoid::createType(), buildFunctionParameterList(), g);
    appendStatement(decl, g);
    SgBasicBlock* body = decl->get_definition()->get_body();
    //  ROSE_ASSERT(isSgAsmFile(asmFiles[0]));
    //  X86CTranslationPolicy policy(newFile, isSgAsmFile(asmFiles[0]));
    X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
    ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);

    policy.switchBody = buildBasicBlock();
    removeDeadStores(policy.switchBody,policy);

    SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
    ROSE_ASSERT(isSgBasicBlock(sw->get_body()));

    SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);

    appendStatement(whileStmt, body);
    policy.whileBody = sw;

    X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
    //AS FIXME: This query gets noting in the form in the repository. Doing this hack since we only 
    //have one binary file anyways.
    //vector<SgNode*> instructions = NodeQuery::querySubTree(asmFiles[0], V_SgAsmX86Instruction);
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);

    std::cout << "Instruction\n";
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]);
      ROSE_ASSERT (insn);
      try {
          t.processInstruction(insn);
      } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
          std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
      }
    }


    if ( enable_normalizations == true )
    {
      //Enable normalizations of C representation
      //This is done heuristically where some steps
      //are repeated. It is not clear which order is 
      //the best
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
        removeUnusedVariables(policy.switchBody);
      }
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
      }
      removeUnusedVariables(policy.switchBody);
    }

  
  }else{ //Experimental changes to introduce functions into the C representation


    //When trying to add function I get that symbols are not defined

    //Iterate over the functions separately
    vector<SgNode*> asmFunctions = NodeQuery::querySubTree(proj, V_SgAsmFunction);

    for(size_t j = 0; j < asmFunctions.size(); j++ )
    {
      SgAsmFunction* binFunc = isSgAsmFunction( asmFunctions[j] );

      // Some functions (probably just one) are generated to hold basic blocks that could not
      // be assigned to a particular function. This happens when the Disassembler is overzealous
      // and the Partitioner cannot statically determine where the block belongs.  The name of
      // one such function is "***uncategorized blocks***".  [matzke 2010-06-29]
      if ((binFunc->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
        continue;

      //Some functions may be unnamed so we need to generate a name for those
      std::string funcName;
      if (binFunc->get_name().size()==0) {
	char addr_str[64];
	sprintf(addr_str, "0x%"PRIx64, binFunc->get_statementList()[0]->get_address());
	funcName = std::string("my_") + addr_str;;
      } else {
	funcName = "my" + binFunc->get_name();
      }

      //Functions can have illegal characters in their name. Need to replace those characters
      for ( int i = 0 ; i < funcName.size(); i++ )
      {
	char& currentCharacter = funcName.at(i);
	if ( currentCharacter == '.' )
	  currentCharacter = '_';
      }


      SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration(funcName, SgTypeVoid::createType(), buildFunctionParameterList(), g);

      appendStatement(decl, g);
      SgBasicBlock* body = decl->get_definition()->get_body();
      X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
      ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);
      policy.switchBody = buildBasicBlock();
      SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
      SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);
      appendStatement(whileStmt, body);
      policy.whileBody = sw;
      X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
      vector<SgNode*> instructions = NodeQuery::querySubTree(binFunc, V_SgAsmX86Instruction);

      for (size_t i = 0; i < instructions.size(); ++i) {
        SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]);
	if( insn->get_kind() == x86_nop )
	  continue;
        ROSE_ASSERT (insn);
        try {
            t.processInstruction(insn);
        } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
            std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
        }
      }

    }

    //addDirectJumpsToSwitchCases(policy);


  }

  proj->get_fileList().erase(proj->get_fileList().end() - 1); // Remove binary file before calling backend

//  AstTests::runAllTests(proj);

  //Compile the resulting project

  return backend(proj);
}
std::string
RoseBin_GMLGraph::getInternalNodes(  SgGraphNode* node,
                                     bool forward_analysis, SgAsmNode* internal) {

  SgAsmInstruction* bin_inst = isSgAsmInstruction(internal);
  SgAsmX86Instruction* control = isSgAsmX86Instruction(internal);
  // get the unparser string!
  string eval = "";
  string name="noname";
  string regs = "";

  // specifies that this node has no destination address
  nodest_jmp = false;
  // specifies that there is a node that has a call error (calling itself)
  error =false;
  // specifies a call to a unknown location
  nodest_call = false;
  // specifies where its an int instruction
  interrupt = false;
  // specifies whether a node has been visited (dfa)
  checked = false;

  dfa_standard = false;
  dfa_resolved_func =false;
  dfa_unresolved_func=false;
  string dfa_info="";
  string dfa_variable="";
  string visitedCounter="";

  map < int , string> node_p = node->get_properties();
  map < int , string>::iterator prop = node_p.begin();
  string type = "removed";//node->get_type();
  for (; prop!=node_p.end(); ++prop) {
    int addr = prop->first;
    // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl;
    if (addr==SgGraph::name)
      name = prop->second;
    else if (addr==SgGraph::eval)
      eval = prop->second;
    else if (addr==SgGraph::regs)
      regs = prop->second;
    else if (addr==SgGraph::nodest_jmp)
      nodest_jmp = true;
    else if (addr==SgGraph::itself_call)
      error = true;
    else if (addr==SgGraph::nodest_call)
      nodest_call = true;
    else if (addr==SgGraph::interrupt)
      interrupt = true;
    else if (addr==SgGraph::done)
      checked = true;
    else if (addr==SgGraph::dfa_standard)
      dfa_standard = true;
    else if (addr==SgGraph::dfa_resolved_func) {
      dfa_resolved_func = true;
      dfa_info = prop->second;
    } else if (addr==SgGraph::dfa_unresolved_func) {
      dfa_unresolved_func = true;
      dfa_info = prop->second;
    } else if (addr==SgGraph::dfa_variable) {
      dfa_variable = prop->second;
    } else if (addr==SgGraph::visitedCounter) {
      visitedCounter = prop->second;
    } else {
      cerr << " *************** dotgraph: unknown property found :: " << addr << endl;
    }
  }

  if (bin_inst) {
    type += " " + bin_inst->class_name();
  }

  string add = "";
  string typeNode = "";
  if (control->get_kind() == x86_call || control->get_kind() == x86_ret) {
    typeNode += " Type_ \"[ 67108864 FUNCTION_NODE ]\" \n";
    if (nodest_call)
      add = " FF9900 ";
    else if (error)
      add = " 3399FF ";
    else
      add = " FFCCFF ";
  } else if (control->get_kind() == x86_jmp) {
    typeNode += " Type_ \"[  67108864 FILE_NODE ]\" \n";
    if (nodest_jmp)
      add = " FF0000 ";
    else
      add = " 00FF00 ";
  } else
    if (x86InstructionIsControlTransfer(control)) {
      typeNode += " Type_ \"[  67108864 CLASS_NODE ]\" \n";
      if (control->get_kind() == x86_int)
        add = " 0000FF ";
      else
        add = " 008800 ";
    } else {
      add = " FFFF66 ";
    }
  if (checked)
    add = " 777777 ";

  if (dfa_standard)
    add = " FFFF00 ";
  if (dfa_resolved_func)
    add = " 00FF00 ";
  if (dfa_unresolved_func)
    add = " FF0000 ";

  string nodeStr = "";

  regs+=eval;
  // cant get the extra register info printed in gml format
  // because multiline is not supported? (tps 10/18/07)
  name = name/*+" " +regs + "  " +dfa_variable+" "+"vis:"+visitedCounter */;
  nodeStr= "   label \"" + name+"\"\n "+typeNode;
  int length = name.length();


  SgAsmX86Instruction* pre = NULL; // isSgAsmX86Instruction(bin_inst->cfgBinFlowInEdge());
  if (pre==NULL) {
    // first node
    nodeStr +="   first_ 1 \n";
  } else {
    if (pre->get_kind() == x86_ret || pre->get_kind() == x86_hlt) {
      // this instruction must be suspicious
      add =" 0000FF ";
    }
  }
  nodeStr += "  Node_Color_ " + add + "  \n";
  nodeStr += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"rectangle\" fill \"#" + add +  "\"  ]\n";



  return nodeStr;
}
Example #14
0
bool 
CompassAnalyses::BinaryInterruptAnalysis::Traversal::run(string& name, SgGraphNode* node,
                                              SgGraphNode* previous){
  // check known function calls and resolve variables
  ROSE_ASSERT(node);
  vector<uint64_t> val_rax, val_rbx, val_rcx, val_rdx ;
  std::vector<uint64_t> pos_rax, pos_rbx, pos_rcx, pos_rdx;
  uint64_t fpos_rax, fpos_rbx, fpos_rcx, fpos_rdx=0xffffffff;

  SgAsmX86Instruction* asmNode = isSgAsmX86Instruction(node->get_SgNode());
  if (asmNode) {
    //    cerr << " Interrupt Analysis :: checking node " << RoseBin_support::HexToString(asmNode->get_address())
    //	 << "  - " << toString(asmNode->get_kind()) << endl;
    // ANALYSIS 1 : INTERRUPT DETECTION -------------------------------------------

    // verify all interrupts and make sure they do what one expects them to do.
    if (asmNode->get_kind() == x86_int) {
      if (RoseBin_support::DEBUG_MODE()) 
	cout << "    " << name << " : found int call " << endl;
      // need to resolve rax, rbx, rcx, rdx
      // therefore get the definition for each
      getValueForDefinition(val_rax, pos_rax, fpos_rax, node, std::make_pair(x86_regclass_gpr, x86_gpr_ax));
      getValueForDefinition(val_rbx, pos_rbx, fpos_rbx, node, std::make_pair(x86_regclass_gpr, x86_gpr_bx));
      getValueForDefinition(val_rcx, pos_rcx, fpos_rcx, node, std::make_pair(x86_regclass_gpr, x86_gpr_cx));
      getValueForDefinition(val_rdx, pos_rdx, fpos_rdx, node, std::make_pair(x86_regclass_gpr, x86_gpr_dx));

      string int_name = "unknown ";

      DataTypes data_ebx = unknown;
      DataTypes data_ecx = unknown;
      DataTypes data_edx = unknown;

      bool ambigious_inst=false;
      if (val_rax.size()>1)
	ambigious_inst = true;
      else
        if (val_rax.size()==1) {
          uint64_t rax = *(val_rax.begin());
          int_name = getIntCallName(rax, data_ebx, data_ecx, data_edx,
                                    val_rbx, val_rcx, val_rdx, 
                                    pos_rbx, pos_rcx, pos_rdx,
                                    fpos_rbx, fpos_rcx, fpos_rdx);
          ambigious_inst = false;
        }

      if (ambigious_inst) {      
	string value = "";
	vector<uint64_t>::iterator it = val_rax.begin();
	for (;it!=val_rax.end();++it) {
	  string i_name = getIntCallName(*it, data_ebx, data_ecx, data_edx,
                                         val_rbx, val_rcx, val_rdx, 
					 pos_rbx, pos_rcx, pos_rdx,
					 fpos_rbx, fpos_rcx, fpos_rdx);
          value +="rAX:"+RoseBin_support::HexToString(*it)+" "+i_name+" ";
	  //	  createVariable(fpos_rax, pos_rax, "rax", data_ebx, "rax", 0, val_rax,false); 
	}

	//cerr << " DataFlow::VariableAnalysis . Ambigious INT call: " <<
	//  vizzGraph->getProperty(SgGraph::name, node) << " - " << value << endl;
	value = "PROBLEM: " + value; 
	node->append_properties(SgGraph::dfa_unresolved_func,value);

      } else {
	// we know what INT instruction it is
	string t_ebx = RoseBin_support::getTypeName(data_ebx);
	string t_ecx = RoseBin_support::getTypeName(data_ecx);
	string t_edx = RoseBin_support::getTypeName(data_edx);

	int_name += " ("+t_ebx+","+t_ecx+","+t_edx+")";
	//if (RoseBin_support::DEBUG_MODE()) 
	// cout << " found INT call : " << value << " .. " << int_name << endl;
	node->append_properties(SgGraph::dfa_variable,int_name);
      }
    }
  }
  return false;
}
Example #15
0
// Analyze the allocation type and location of this-pointers.
void ThisPtrUsage::analyze_alloc() {
  // Set the allocation type to unknown by default.
  alloc_type = AllocUnknown;

  // Use the type returned from get_memory_type() to determine our allocation type.  Perhaps in
  // the future thse can be fully combined, but this was what was required to support non-zero
  // ESP initialization.
  MemoryType type = this_ptr->get_memory_type();
  if (type == StackMemLocalVariable) {
    alloc_type = AllocLocalStack;
  }
  else if (type == StackMemParameter) {
    alloc_type = AllocParameter;
  }
  else if (type == UnknownMem) {
    // This code is really a function of get_memory_type() still being broken.
    // If we're not a constant address (global), skip this function.
    if (!this_ptr->is_number() || this_ptr->get_width() > 64) return;
    // This is a bit hackish, but also reject obviously invalid constants.
    size_t num = this_ptr->get_number();
    // Here's a place where we're having the age old debate about how to tell what is an
    // address with absolutely no context.  Cory still likes consistency.  Others have
    // suggested that we should be using the memory map despite all of it's flaws...
    // if (!global_descriptor_set->memory_in_image(num)) return;
    if (num < 0x10000 || num > 0x7FFFFFFF) return;
    // Otherwise, we look like a legit global address?
    alloc_type = AllocGlobal;
  }
  else {
    return;
  }

  // It's not actually clear why we're looking for the allocation instruction.  Perhaps we
  // should quit doing this and just use the tests above.  At least for now though, looking for
  // the common pattern allows us to detect some unusual situations.  Wes' previous logic also
  // filtered by requiring LEA instructions, so we're preserving that limit.

  // This code previously relied on the first-creator-of-read feature of modifiers, which we're
  // retiring.  Even though it's not clear that this code is required, I've updated it to use
  // latest definers in place of modifiers, only we haven't switch to just using the _latest_
  // definers yet so it needed some additional filtering to determine which definer to use.
  PDG* pdg = fd->get_pdg();
  // Shouldn't happen.
  if (!pdg) return;
  const DUAnalysis& du = pdg->get_usedef();

  // This is hackish too.  We need it for debugging so that we have some way of reporting which
  // instructions are involved in the confusion.
  SgAsmInstruction* first_insn = NULL;
  for (SgAsmInstruction *ginsn : this_ptr->get_defining_instructions()) {
    // For debugging so that we have an address.
    if (first_insn == NULL) first_insn = ginsn;

    SgAsmX86Instruction *insn = isSgAsmX86Instruction(ginsn);
    if (insn == NULL) continue;

    // Since definers isn't the "latest" definer just yet, filter our subsequent analysis to
    // the one that wrote the this-ptr value.  This is hackish and wrong because we shouldn't
    // have to filter.  But maybe once we can upgrade , this code can go away...
    auto writes = du.get_writes(insn);
    bool found_write = false;
    for (const AbstractAccess& aa : writes) {
      if (aa.value->get_expression()->isEquivalentTo(this_ptr->get_expression())) {
        found_write = true;
        break;
      }
    }
    // If this instruction didn't write the this-ptr, it's not the one that we're looking for.
    if (!found_write) continue;
    // If we're here, this should be the instruction that defined the this-pointer.

    // If we're a local variable and we've found an LEA instruction, that's probably the one
    // we're looking for.
    if (alloc_type == AllocLocalStack && insn->get_kind() == x86_lea) {
      alloc_insn = ginsn;
      GDEBUG << "Stack allocated object: " << *(this_ptr->get_expression()) << " at "
             << debug_instruction(alloc_insn) << LEND;
      return;
    }
    // For global variables, the typical cases are move or push instructions.
    else if (alloc_type == AllocGlobal &&
             (insn->get_kind() == x86_mov || insn->get_kind() == x86_push)) {
      alloc_insn = ginsn;
      GDEBUG << "Global static object: " << *(this_ptr->get_expression()) << " at "
             << debug_instruction(alloc_insn) << LEND;
      return;
    }
    // For passed parameters, we should probably be looking for the instruction that reads the
    // undefined this-pointer value.  This code was sufficient at the time...
    else if (alloc_type == AllocParameter) {
      GDEBUG << "Passed object: " << *(this_ptr->get_expression()) << LEND;
      return;
    }
  }

  if (first_insn == NULL) {
    GDEBUG << "No allocation instruction found for " << *(this_ptr->get_expression())
           << " alloc_type=" << Enum2Str(alloc_type) << LEND;
  }
  else {
    GDEBUG << "No allocation instruction found for " << *(this_ptr->get_expression())
           << " alloc_type=" << Enum2Str(alloc_type) << " at " << debug_instruction(first_insn) << LEND;
  }

  // Based on evaluation of the test suite, if we've reached this point, something's gone
  // wrong, and it's very unclear if we're really the allocation type we thought we were.
  // Perhaps it's better to be cautious and retract our allocation type claims.  We could also
  // choose to return the best guess of our type here, by removing this line.
  //alloc_type = AllocUnknown;
  return;
}
void RoseBin_GMLGraph::printEdges( VirtualBinCFG::AuxiliaryInformation* info, bool forward_analysis, std::ofstream& myfile, SgDirectedGraphEdge* edge) {
  // traverse edges and visualize results of graph
    SgGraphNode* source = isSgGraphNode(edge->get_from());
    SgGraphNode* target = isSgGraphNode(edge->get_to());
    ROSE_ASSERT(source);
    ROSE_ASSERT(target);

    string edgeLabel="";
    map < int , string> edge_p = edge->get_properties();
    map < int , string>::iterator prop = edge_p.begin();
    //string type = node->get_type();
    for (; prop!=edge_p.end(); ++prop) {
      int addr = prop->first;
      // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl;
      if (addr==SgGraph::edgeLabel)
        edgeLabel = prop->second;
      if (edgeLabel.length()>1)
        if (edgeLabel[0]!='U')
          edgeLabel="";
    }

    SgAsmStatement* binStat_s = isSgAsmStatement(source->get_SgNode());
    SgAsmStatement* binStat_t = isSgAsmStatement(target->get_SgNode());
    if (binStat_s==NULL || binStat_t==NULL) {
      //cerr << "binStat_s==NULL || binStat_t==NULL" << endl;
    } else {
      map <SgAsmStatement*, int>::iterator it_s = nodesMap.find(binStat_s);
      map <SgAsmStatement*, int>::iterator it_t = nodesMap.find(binStat_t);
      int pos_s=0;
      int pos_t=0;
      if (it_s!=nodesMap.end())
        pos_s = it_s->second;
      if (it_t!=nodesMap.end())
        pos_t = it_t->second;

      if (pos_s==0 || pos_t==0) {
        //cerr << " GMLGraph edge, node == 0 " << endl;
      }

      string output = "edge [\n  label \""+edgeLabel+"\"\n source " + RoseBin_support::ToString(pos_s) +
        "\n   target " + RoseBin_support::ToString(pos_t) + "\n";

      // ------------------
      SgAsmX86Instruction* contrl = isSgAsmX86Instruction(source->get_SgNode());
      string add = "";
      if (contrl && x86InstructionIsControlTransfer(contrl)) {
        // the source is a control transfer function

        // we use either dest or dest_list
        // dest is used for single destinations during cfg run
        // dest_list is used for a static cfg image
        vector<VirtualBinCFG::CFGEdge> outEdges = contrl->cfgBinOutEdges(info);
        SgAsmX86Instruction* dest = isSgAsmX86Instruction(outEdges.empty() ? NULL : outEdges.back().target().getNode());
        bool dest_list_empty = true;
        if (contrl->get_kind() == x86_ret)
          dest_list_empty = outEdges.empty();

        SgAsmInstruction* nextNode = isSgAsmInstruction(target->get_SgNode());
        ROSE_ASSERT(nextNode);

        if (dest) {
          //string type = "jmp_if";
          if (dest==nextNode) {
            if (contrl->get_kind() == x86_call || contrl->get_kind() == x86_ret) {
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ]  ]\n";
            } else if (contrl->get_kind() == x86_jmp) {
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ]  ]\n";
            } else
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#00FF00\" ]  ]\n";
          } else
            if (forward_analysis &&
                (contrl->get_kind() == x86_call || contrl->get_kind() == x86_jmp)) {
              add += "   graphics [ type \"line\" arrow \"last\" fill \"#FFFF00\" ]  ]\n";
            }
        } else
          if (contrl->get_kind() == x86_ret ) { //&& dest_list_empty) {
            // in case of a multiple return
            add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#3399FF\" ]  ]\n";
          }
      }

      string type_n = getProperty(SgGraph::type, edge);
      if (type_n==RoseBin_support::ToString(SgGraph::usage)) {
        add = "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#000000\" ]  ]\n";
      }

      // skip the function declaration edges for now
      //      bool blankOutput=false;
      //if (skipFunctions)
      //if (isSgAsmFunction(binStat_s))
      //  blankOutput=true;
      if (skipInternalEdges) {
        SgAsmX86Instruction* contrl = isSgAsmX86Instruction(source->get_SgNode());
        if (contrl && x86InstructionIsControlTransfer(contrl) && contrl->get_kind() != x86_ret) {
          if (contrl->get_kind() == x86_call)
            output += "  Edge_Color_ FF0000  \n  Type_ \"[ 33554432 CALL_EDGE ]\" \n";
          else if (contrl->get_kind() == x86_jmp)
            output += "  Edge_Color_ 00FF00  \n  Type_ \"[ 33554432 FILECALL_EDGE ]\" \n";
          else
            output += "  Edge_Color_ 0000FF  \n   ";
        }
        //else
        //  blankOutput=true;
      }

      if (add=="")
        output += "   graphics [ type \"line\" arrow \"last\" fill \"#000000\" ]  ]\n";
      else output +=add;

      myfile << output;
    }

    //  }
  // ----------
    //  nodesMap.clear();

}
Example #17
0
bool
GraphAlgorithms::isValidCFGEdge(SgGraphNode* sgNode,
                                SgGraphNode* sgNodeBefore) {
  if (!sgNode || !sgNodeBefore)
    return false;
  //  bool isAUnconditionalControlTransfer = false;
  bool valid = true;
  bool isDirectedControlFlowEdge = false;
  SgAsmX86Instruction* inst = isSgAsmX86Instruction(sgNodeBefore->get_SgNode());

  SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode());
  SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode());
  if (instSgNode && instSgNodeBefore) {
  if (RoseBin_support::DEBUG_MODE())
    cout << " *** instSgNode && instSgNodeBefore " << endl;
    SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent());
    SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent());
    if (f1==NULL)
      f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent());
    if (f2==NULL)
      f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent());
    if (f1 && f2) {
      // (tps - 05/23/08) : the semantics of the previous implementation is:
      // check the node before in the instruction set and check if it is the same as the previous node
      // todo: the following line must be changed... the size of the current node does not give you the last node!
      if (RoseBin_support::DEBUG_MODE())
      cout << " *** f1 && f2 " << endl;
      SgAsmInstruction* nodeBeforeInSet = NULL;
      int byte = 1;
      ROSE_ASSERT(info);
      while (nodeBeforeInSet==NULL && byte<8) {
        nodeBeforeInSet = info->getInstructionAtAddress(instSgNode->get_address() - byte);
        byte++;
      }
      if (RoseBin_support::DEBUG_MODE())
      cout << " *** nodeBeforeInSet = " << nodeBeforeInSet << "  instSgNodeBefore : " << instSgNodeBefore << "   byte : " << byte << endl;
      if (nodeBeforeInSet == instSgNodeBefore) {
        //if (!isAsmUnconditionalBranch(nodeBeforeInSet))
        if (RoseBin_support::DEBUG_MODE())
        cout << " isDirectedControlFlowEdge = true  --  isAsmUnconditionalBranch(nodeBeforeInSet) : " << isAsmUnconditionalBranch(nodeBeforeInSet) << endl;
        isDirectedControlFlowEdge = true;
      }
      if (RoseBin_support::DEBUG_MODE()) {
      cout << " *** f1 && f2 -- isDirectionalControlFlowEdge: " << isDirectedControlFlowEdge << endl;
      cout << " inst->get_kind() == x86_call : " << (inst->get_kind() == x86_call) << "     inst->get_kind() == x86_ret : " << (inst->get_kind() == x86_ret) << endl;
      }
      if ((inst->get_kind() == x86_call || inst->get_kind() == x86_ret) && isDirectedControlFlowEdge)
        valid=false;
    }
  }
  /*
  if (RoseBin_support::DEBUG_MODE()) {
    cout << " ValidCFGEdge::: sgNode " << sgNode->get_name() <<
      "   sgNodeBefore " << sgNodeBefore->get_name() <<
      "   instSgNode << " << instSgNode <<
      "   instSgNodeBefore << " << instSgNodeBefore <<
      "   is Valid node ? " << RoseBin_support::resBool(valid) <<
      "   isControlFlowEdge " << RoseBin_support::resBool(isDirectedControlFlowEdge) << endl;
  }
  */

  return valid;
}
Example #18
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) try {
        using namespace rose::BinaryAnalysis::InstructionSemantics;

        static const char *name = "Analysis";
        using namespace rose::BinaryAnalysis::InsnSemanticsExpr;
        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with
            // "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed assertion when ROSE tries to use
            // the solver.
            rose::BinaryAnalysis::YicesSolver smt_solver;
            smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are annoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));            // branch to analysis address

#if 1
            {
                // This is a kludge.  If the first instruction is an indirect JMP then assume we're executing through a dynamic
                // linker thunk and execute the instruction concretely to advance the instruction pointer.
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr));
                if (x86_jmp==insn->get_kind()) {
                    PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p;
                    X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State,
                                                                             PartialSymbolicSemantics::ValueType>,
                                            PartialSymbolicSemantics::ValueType> sem(p);
                    MemoryMap p_map = args.thread->get_process()->get_memory();
                    BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments())
                        segment.buffer()->copyOnWrite(true);
                    p.set_map(&p_map); // won't be thread safe
                    sem.processInstruction(insn);
                    policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value()));
                    trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64,
                                name, analysis_addr, p.readRegister<32>("eip").known_value());
                }
            }
#endif

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            size_t nbranches = 0;
            std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str());
                semantics.processInstruction(insn);
                if (policy.readRegister<32>("eip").is_known())
                    continue;
                
                bool complete;
                std::set<rose_addr_t> succs = insn->getSuccessors(&complete);
                if (complete && 2==succs.size()) {
                    if (nbranches>=take_branch.size()) {
                        std::ostringstream s; s<<policy.readRegister<32>("eip");
                        trace->mesg("%s: EIP = %s", name, s.str().c_str());
                        trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name);
                        throw this;
                    }

                    // Decide whether we should take the branch or not.
                    bool take = take_branch[nbranches++];
                    rose_addr_t target = 0;
                    for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) {
                        if ((take && *si!=insn->get_address()+insn->get_size()) ||
                            (!take && *si==insn->get_address()+insn->get_size()))
                            target = *si;
                    }
                    assert(target!=0);
                    trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target);

                    // Is this path feasible?  We don't really need to check it now; we could wait until the end.
                    TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(),
                                                         LeafNode::create_integer(32, target));
                    constraints.push_back(c); // shouldn't really have to do this again if we could save some state
                    if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) {
                        policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target));
                    } else {
                        trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name);
                        break;
                    }
                }
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<name <<": symbolic return value is " <<result <<"\n"
                  <<name <<": return value has " <<vars.size() <<" variables:";
                for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin();
                     vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                s <<"\n";
                if (!constraints.empty()) {
                    s <<name <<": path constraints:\n";
                    for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci)
                        s <<name <<":   " <<*ci <<"\n";
                }
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("%s: evaluation result could not be determined. ERROR!", name);
                    } else if (!result_value->is_known()) {
                        trace->mesg("%s: evaluation result is not constant. ERROR!", name);
                    } else {
                        trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value());
                    }
                } else {
                    trace->mesg("%s: expression is not satisfiable. (or unknown)", name);
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                        LeafNode::create_integer(32, 0xff015e7c));
                exprs.push_back(expr);
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("%s:   v%"PRIu64" = %"PRIu64" %c",
                                        name, (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("%s:   expression is not satisfiable (or unknown).  No solutions.", name);
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    } catch (const Analysis*) {
        args.thread->get_process()->get_simulator()->activate();
        throw;
    }