void
CompassAnalyses::BinPrintAsmInstruction::Traversal::
visit(SgNode* n)
   { 

     if (isSgBinaryComposite(n) && file==NULL)
       file = isSgBinaryComposite(n);

  SgAsmx86Instruction* binInst = isSgAsmx86Instruction(n);
  if (binInst==NULL) return;
  ROSE_ASSERT(binInst);

  string className = rose::stringifyX86InstructionKind(binInst->get_kind(), "x86_");
  int nr = 1;

//rose_hash::unordered_map<std::string, int>::const_iterator it = instMap.find(className);
  rose_hash::unordered_map<std::string, int>::const_iterator it = instMap.find(className);

  if (it!=instMap.end()) {
    nr = it->second;
    nr++;
  }
  instMap[className]=nr;

  unsigned int address = binInst->get_address();
  ostringstream addrhex;
  addrhex << hex << setw(8) << address ;
  string address_str = addrhex.str();

   } //End of the visit function.
Beispiel #2
0
    virtual bool operator()(bool enabled, const Args &args) /*overrides*/ {
        if (enabled) {
            if (!triggered && args.insn->get_address()==when) {
                triggered = true;
                initialize_state(args.thread);
            }
            SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.insn);
            if (triggered && insn) {
                RTS_Message *m = args.thread->tracing(TRACE_MISC);
                m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str());
                policy.get_state().registers.ip = SymbolicSemantics::ValueType<32>(insn->get_address());
                semantics.processInstruction(insn);

                SMTSolver::Stats smt_stats = yices.get_stats();
                m->mesg("%s: mem-cell list size: %zu elements\n", name, policy.get_state().memory.cell_list.size());
                m->mesg("%s: SMT stats: ncalls=%zu, input=%zu bytes, output=%zu bytes\n",
                        name, smt_stats.ncalls, smt_stats.input_size, smt_stats.output_size);
                yices.reset_stats();

#if 0
                std::ostringstream ss; ss <<policy;
                m->mesg("%s", ss.str().c_str());
#endif
            }
        }
        return enabled;
    }
Beispiel #3
0
/** Returns a string containing the specified operand. */
std::string unparseX86Expression(SgAsmExpression *expr, const AsmUnparser::LabelMap *labels) {
    /* Find the instruction with which this expression is associated. */
    SgAsmx86Instruction *insn = NULL;
    for (SgNode *node=expr; !insn && node; node=node->get_parent()) {
        insn = isSgAsmx86Instruction(node);
    }
    ROSE_ASSERT(insn!=NULL);
    return unparseX86Expression(expr, labels, insn->get_kind()==x86_lea);
}
Beispiel #4
0
/**************************************************************************
 * Main function. This function is run on each node that is being traversed
 * in the graph. For each node, we determine the successors and check
 * if those have been previously seen. If yes, a cycle may exist.
 **************************************************************************/
bool 
CompassAnalyses::CycleDetection::Traversal::run(string& name, SgGraphNode* node,
                                                SgGraphNode* previous){
  // check known function calls and resolve variables
  ROSE_ASSERT(node);

  //cerr << " cycledetection->run " << node->get_name() << endl;
  SgAsmFunction* func = isSgAsmFunction(node->get_SgNode());
  if (func) {
    // if the node is a function, we clear the visited nodes
    // this should speed up our search
    visited.clear();
    return false;
  }
  successors.clear();
  ROSE_ASSERT(vizzGraph);
  vizzGraph->getSuccessors(node, successors);    
  vector<SgGraphNode*>::iterator succ = successors.begin();
  for (;succ!=successors.end();++succ) {
    // for each successor do...
    SgGraphNode* next = *succ;
    // if the node is an instruction, we check if it was visited
    // if not, we add it to the visited set, otherwise a cycle is present
    std::set<SgGraphNode*>::iterator it =visited.find(next);
    if (it!=visited.end()) {
      // found this node in visited list
      SgAsmx86Instruction* nodeSg = isSgAsmx86Instruction(node->get_SgNode());
      SgAsmx86Instruction* nextSg = isSgAsmx86Instruction(next->get_SgNode());
      if (debug) {
        std::string outputText = "Found possible cycle between  ";
        outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and ";
        outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")";
        std::cerr << outputText << std::endl;
        output->addOutput(new CheckerOutput(nodeSg, outputText));
      }
      bool validCycle = checkIfValidCycle(node,next);
      if (validCycle) {
        std::string outputText = "Found cycle between  ";
        outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and ";
        outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " (";
        outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")";
        std::cerr << outputText << std::endl;
        output->addOutput(new CheckerOutput(nodeSg, outputText));
	cycleFound[node]=next;
      } else {
	if (debug)
	  std::cerr << "This is not a cyclic node "  << std::endl;
      }
    }
  }
  visited.insert(node);
  return false;
}
Beispiel #5
0
    virtual bool operator()(bool enabled, const Args &args) {
        SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.insn);
        assert(insn);
        ninsns++;

        std::string kind = rose::stringifyX86InstructionKind(insn->get_kind(), "x86_");
        std::pair<Histogram::iterator, bool> inserted = insns.insert(std::make_pair<std::string, size_t>(kind, 1));
        if (!inserted.second)
            inserted.first->second++;

        return enabled;
    }
void
CompassAnalyses::BinaryInterruptAnalysis::Traversal::getValueForDefinition(std::vector<uint64_t>& vec,
                                                                std::vector<uint64_t>& positions,
                                                                uint64_t& fpos,
                                                                SgGraphNode* node,
                                                                std::pair<X86RegisterClass, int> reg ) {
  set <SgGraphNode*> defNodeSet = getDefFor(node, reg);
  if (RoseBin_support::DEBUG_MODE()) 
    cout << "    size of found NodeSet = " << defNodeSet.size() <<endl;
  set <SgGraphNode*>::const_iterator it = defNodeSet.begin();
  for (;it!=defNodeSet.end();++it) {
    SgGraphNode* defNode = *it;
    if (RoseBin_support::DEBUG_MODE() && defNode) 
      cout << "    investigating ... " << defNode->get_name() <<endl;
    ROSE_ASSERT(defNode);
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(defNode->get_SgNode());
    ROSE_ASSERT(inst);
    positions.push_back(inst->get_address());
    // the right hand side of the instruction is either a use or a value
    bool memRef = false, regRef = false;
    std::pair<X86RegisterClass, int> regRight =
      check_isRegister(defNode, inst, true, memRef, regRef);

    if (RoseBin_support::DEBUG_MODE()) {
      string regName = unparseX86Register(RegisterDescriptor(reg.first, reg.second, 0, 64), NULL);
      string regNameRight = unparseX86Register(RegisterDescriptor(regRight.first, regRight.second, 0, 64), NULL);
      cout << " VarAnalysis: getValueForDef . " << regName << "  right hand : " << regNameRight <<endl;
    }
    if (!regRef) {
      // it is either a memref or a value
      if (!memRef) {
	// get value of right hand side instruction
	uint64_t val = getValueOfInstr(inst, true);
	vec.push_back(val);
	fpos = inst->get_address();
	if (RoseBin_support::DEBUG_MODE()) 
      	  cout << "    found  valueOfInst = " << RoseBin_support::ToString(val) <<endl;
      }
    } else {
      // it is a register reference. I.e we need to follow the usage edge to find the 
      // definition of that node
      SgGraphNode* usageNode = g_algo->getDefinitionForUsage(vizzGraph,defNode);
      if (usageNode && usageNode!=node) {
	if (RoseBin_support::DEBUG_MODE() && usageNode) 
      	  cout << "    following up usage for " << usageNode->get_name() <<endl;
	getValueForDefinition(vec, positions, fpos, usageNode, regRight);
      } else {
	// we look at the same node.
	cout << " ERROR :: Either following usage to itself or usageNode = NULL. " << usageNode << endl;
      }
    }
  }
}
Beispiel #7
0
/* Assembles all instructions of an interpretation.  ROSE allows blocks to be non-contiguous (i.e., an unconditiona jump can
 * appear in the middle of a basic block).  However, we need to disassemble contiguous instructions. */
static void
assemble_all(SgAsmInterpretation *interp)
{
    size_t nassembled = 0;
    Assembler *assembler = Assembler::create(interp);
    ROSE_ASSERT(assembler!=NULL);
    InstructionCollector collector(interp);
    for (Disassembler::InstructionMap::iterator ii=collector.insns.begin(); ii!=collector.insns.end(); ++ii) {
        rose_addr_t original_va = ii->first;

        /* The new_va is the virtual address of the instruction now that we may have moved it to a new location in memory.
         * We're leaving this implementation for later. For now, just assume that instructions don't move in memory. */
        rose_addr_t new_va = original_va;

        SgAsmx86Instruction *insn = isSgAsmx86Instruction(ii->second);
        ROSE_ASSERT(insn!=NULL);
        SgUnsignedCharList machine_code;
        try {
            insn->set_address(new_va);
            machine_code = assembler->assembleOne(insn);
            ROSE_ASSERT(!machine_code.empty());
            ++nassembled;
        } catch (const Assembler::Exception &e) {
            std::cerr <<"assembly failed at " <<StringUtility::addrToString(e.insn->get_address())
                      <<": " <<e.what() <<std::endl;
            if (!assembler->get_debug()) {
                assembler->set_debug(stderr);
                try {
                    assembler->assembleOne(insn);
                } catch (...) {
                    /*void*/
                }
                assembler->set_debug(false);
            }
            //return;
        }

#if 0   /* Don't worry about writing the instruction back out to the section. [RPM 2011-08-23] */
        /* We don't handle the case where an instruction grows because that could cause us to require that the section
         * containing the instruction grows, which opens a whole can of worms. */
        ROSE_ASSERT(machine_code.size() <= insn->get_size());
        
        /* We're using the same memory map as what was used when we loaded the binary and disassembled it. Therefore, the
         * machine code that we're writing back needs to fall within those same areas of the virtual address space: we cannot
         * write past the end of mapped memory, nor can we write to the space (if any) between mapped memory chunks. */
        size_t nwritten = interp->get_map()->write(&(machine_code[0]), new_va, machine_code.size(), MemoryMap::MM_PROT_NONE);
        ROSE_ASSERT(nwritten==machine_code.size());
#endif
    }

    std::cout <<"Assembled " <<nassembled <<" instruction" <<(1==nassembled?"":"s") <<"\n";
    delete assembler;
}
// see base class
bool
SgAsmx86Instruction::is_function_call(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target)
{
    if (insns.size()==0)
        return false;
    SgAsmx86Instruction *last = isSgAsmx86Instruction(insns.back());
    if (!last)
        return false;
    if (last->get_kind()!=x86_call && last->get_kind()!=x86_farcall)
        return false;
    last->get_branch_target(target);
    return true;
}
bool
RoseBin_DataFlowAbstract::sameParents(SgGraphNode* node, SgGraphNode* next) {
  bool same=false;
  if (isSgAsmFunction(node->get_SgNode())) {
    return true;
  }

  SgAsmx86Instruction* thisNode = isSgAsmx86Instruction(node->get_SgNode());
  SgAsmx86Instruction* nextNode = isSgAsmx86Instruction(next->get_SgNode());
  if (thisNode && nextNode) {
    SgAsmFunction* func1 = isSgAsmFunction(thisNode->get_parent());
    SgAsmFunction* func2 = isSgAsmFunction(nextNode->get_parent());
    if (func1==func2)
      same=true;
  }
  return same;
}
Beispiel #10
0
 // Replace the comparator defined below?
 bool operator<(const Definition& other) const {
   if (definer == NULL && other.definer != NULL) return true;
   if (definer != NULL && other.definer == NULL) return false;
   if (definer == NULL && other.definer == NULL) return (access < other.access);
   if (definer->get_address() < other.definer->get_address()) return true;
   if (definer->get_address() > other.definer->get_address()) return false;
   return (access < other.access);
 }
Beispiel #11
0
SgAsmx86Instruction* SageBuilderAsm::buildx86Instruction( X86InstructionKind kind )
   {
  // These are the default values used for the construction of new instructions.
     rose_addr_t address            = 0;
     string mnemonic           = "";
     X86InstructionSize baseSize    = x86_insnsize_none;
     X86InstructionSize operandSize = x86_insnsize_none; 
     X86InstructionSize addressSize = x86_insnsize_none;

     SgAsmx86Instruction* instruction = new SgAsmx86Instruction(address,mnemonic,kind,baseSize,operandSize,addressSize);

  // This should not have been set yet.
     ROSE_ASSERT(instruction->get_operandList() == NULL);

  // All instructions are required to have a valid SgAsmOperandList pointer.
     instruction->set_operandList(new SgAsmOperandList ());

  // Set the parent in the SgAsmOperandList
     instruction->get_operandList()->set_parent(instruction);

     return instruction;
   }
SgAsmInstruction*
RoseBin_FlowAnalysis::resolveFunction(SgAsmInstruction* instx, bool hasStopCondition) {
  SgAsmx86Instruction* inst = isSgAsmx86Instruction(instx);
  if (inst==NULL) return NULL;
  ROSE_ASSERT(g_algo->info);
  SgAsmInstruction* nextFlow = inst->cfgBinFlowOutEdge(g_algo->info);
  // if current node is not a controltransfer node (e.g. jmp, ret, ...),
  // then there should be a flow to a next node
  //  SgAsmx86ControlTransferInstruction* contrlInst = isSgAsmx86ControlTransferInstruction(inst);
  if (nextFlow==NULL &&
      hasStopCondition==false) { // && !isSgAsmx86Jmp(inst)) {
    // in this case, we have a ordinary node that should be connected to the next block
    // now lets find the next block and create a function for these two blocks
    uint64_t addrInst = inst->get_address();
    uint64_t size = (inst->get_raw_bytes()).size();
    uint64_t nextAddr = addrInst+size;
    rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator it2 =
      rememberInstructions.find(nextAddr);
    if (it2!=rememberInstructions.end()) {
      // found the next instruction
      nextFlow = isSgAsmInstruction(it2->second);
      //if (RoseBin_support::DEBUG_MODE())
      //        cout << " function resolution: resolving next : " << nextFlow->class_name() << "    this : "
      //             << unparser->unparseInstruction(inst) << endl;
    }
  }

  else if (nextFlow==NULL &&
           hasStopCondition==false && inst->get_kind() == x86_jmp) {
    // in this case we want to connect to the destination
    ROSE_ASSERT(g_algo->info);
    nextFlow = inst->cfgBinFlowOutEdge(g_algo->info);
    //if (RoseBin_support::DEBUG_MODE())
    // cerr << " function resolution: resolving jump " << nextFlow << "  this : " << inst->class_name() << endl;
  } else {
    if (RoseBin_support::DEBUG_MODE())
      if (!(inst->get_kind() == x86_nop || inst->get_kind() == x86_ret))
        cerr << " WARNING: function resolution::  cant resolve :  " << inst->class_name() << "(" << unparseInstruction(inst) << ")" << endl;
  }

  return nextFlow;
}
Beispiel #13
0
std::string normalizeInstructionsToHTML(std::vector<SgAsmx86Instruction*>::iterator beg, 
    std::vector<SgAsmx86Instruction*>::iterator end)
{
    string normalizedUnparsedInstructions;
    map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands( beg,end, valueNumbers);

    // Unparse the normalized forms of the instructions
    for (; beg != end; ++beg ) {
      SgAsmx86Instruction* insn = *beg;
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      mne = "<font color=\"red\">" + htmlEscape(mne)+"</font>";

      normalizedUnparsedInstructions += mne;
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      // Add to total for this variant
      // Add to total for each kind of operand
      size_t operandCount = operands.size();

      normalizedUnparsedInstructions += "<font color=\"blue\">";
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;

        normalizedUnparsedInstructions += (cat == ec_reg ? " R" : cat == ec_mem ? " M" : " V") + boost::lexical_cast<string>(num);
      }
      normalizedUnparsedInstructions += "; </font> <br> ";
  
    }
   
    return normalizedUnparsedInstructions;
};
Beispiel #14
0
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) {
  assert (minNumStepsToFindError >= 1); // Can't find an error on the first step
  assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows
  assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0);
  makeRegMap(origRegisterMap, "");
  makeRegMapZero(newRegisterMap);
  isValidIp = false_();
  validIPs.clear();
  Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1));
  addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount)));
  resetState = problem.build_op_eq(stepCount, zero(32));
  errorsEnabled =
    problem.build_op_and(
      problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)),
      (maxNumStepsToFindError == 0 ?
       true_() :
       problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError))));
  {
    vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction);
    for (size_t i = 0; i < functions.size(); ++i) {
      functionStarts.push_back(isSgAsmFunction(functions[i])->get_address());
      // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address());
    }
  }
  {
    vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock);
    for (size_t i = 0; i < blocks.size(); ++i) {
      SgAsmBlock* b = isSgAsmBlock(blocks[i]);
      if (!b->get_statementList().empty() && isSgAsmx86Instruction(b->get_statementList().front())) {
        blockStarts.push_back(b->get_address());
        // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address());
      }
    }
  }
  {
    vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmx86Instruction);
    for (size_t i = 0; i < calls.size(); ++i) {
      SgAsmx86Instruction* b = isSgAsmx86Instruction(calls[i]);
      if (b->get_kind() != x86_call) continue;
      returnPoints.push_back(b->get_address() + b->get_raw_bytes().size());
      // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size());
    }
  }
  {
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmx86Instruction);
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmx86Instruction* b = isSgAsmx86Instruction(instructions[i]);
      validIPs.push_back(b->get_address());
    }
  }
}
Beispiel #15
0
bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, sqlite3_connection& con) { // Ignores function boundaries
  bool retVal = false;
  vector<SgAsmx86Instruction*> insns;
  FindInstructionsVisitor vis;
  AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
  std::cout << "Number of instructions: " << insns.size() << std::endl;
  size_t insnCount = insns.size();

  for (size_t windowStart = 0;
       windowStart + windowSize <= insnCount;
       windowStart += stride) {
    static SignatureVector vec;
    vec.clear();
    hash_map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands(&insns[windowStart], windowSize, valueNumbers);
    string normalizedUnparsedInstructions;
    // Unparse the normalized forms of the instructions
    for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
      SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
      size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      normalizedUnparsedInstructions += mne;
#endif
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      size_t operandCount = operands.size();
      // Add to total for this variant
      ++vec.totalForVariant(var);
      // Add to total for each kind of operand
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        ++vec.opsForVariant(cat, var);
        // Add to total for this unique operand number (for this window)
        hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;
        ++vec.specificOp(cat, num);
        // Add to total for this kind of operand
        ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
        normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num);
#endif
      }

	  //Try to see what the effect is of jumps on the false positive rate
	  //uint64_t addr =0;
          /*
	  if( x86GetKnownBranchTarget(insn, addr) == true  )
	  {
		uint64_t insn_addr = insn->get_address();
		if( addr < insn_addr )
		  normalizedUnparsedInstructions += " UP ";
		else
		  normalizedUnparsedInstructions += " DOWN ";
	  }*/
	  
      // Add to total for this pair of operand kinds
      if (operandCount >= 2) {
        ExpressionCategory cat1 = getCategory(operands[0]);
        ExpressionCategory cat2 = getCategory(operands[1]);
        ++vec.operandPair(cat1, cat2);
      }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      if (insnNumber + 1 < windowSize) {
        normalizedUnparsedInstructions += ";";
      }
#endif
    }

#if 0
    // Print out this vector
    cout << "{";
    for (size_t i = 0; i < SignatureVector::Size; ++i) {
      if (i != 0) cout << ", ";
      cout << vec[i];
    }
    cout << "}\n";
#endif

    // cout << "Normalized instruction stream: " << normalizedUnparsedInstructions << endl;

    // Add vector to database
    addVectorToDatabase(con, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride);
	retVal = true;
  }
  addFunctionStatistics(con, filename, functionName, functionId, insnCount);
  return retVal;
}
// Ignores function boundaries
bool
createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId,
                                size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx)
{
    bool retVal = false;
    vector<SgAsmx86Instruction*> insns;
    FindInstructionsVisitor vis;
    AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
    size_t insnCount = insns.size();

    for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) {
        static SignatureVector vec;
        vec.clear();
        hash_map<SgAsmExpression*, size_t> valueNumbers[3];
        numberOperands(&insns[windowStart], windowSize, valueNumbers);
        string normalizedUnparsedInstructions;
        // Unparse the normalized forms of the instructions
        for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
            SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
            size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            string mne = insn->get_mnemonic();
            boost::to_lower(mne);
            normalizedUnparsedInstructions += mne;
#endif
            const SgAsmExpressionPtrList& operands = getOperands(insn);
            size_t operandCount = operands.size();
            // Add to total for this variant
            ++vec.totalForVariant(var);
            // Add to total for each kind of operand
            for (size_t i = 0; i < operandCount; ++i) {
                SgAsmExpression* operand = operands[i];
                ExpressionCategory cat = getCategory(operand);
                ++vec.opsForVariant(cat, var);
                // Add to total for this unique operand number (for this window)
                hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
                assert (numIter != valueNumbers[(int)cat].end());
                size_t num = numIter->second;
                ++vec.specificOp(cat, num);
                // Add to total for this kind of operand
                ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
                normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") +
                                                  boost::lexical_cast<string>(num);
#endif
            }

            // Add to total for this pair of operand kinds
            if (operandCount >= 2) {
                ExpressionCategory cat1 = getCategory(operands[0]);
                ExpressionCategory cat2 = getCategory(operands[1]);
                ++vec.operandPair(cat1, cat2);
            }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            if (insnNumber + 1 < windowSize) {
                normalizedUnparsedInstructions += ";";
            }
#endif
        }

        // Add vector to database
        addVectorToDatabase(tx, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions,
                            &insns[windowStart], filename, windowSize, stride);
	retVal = true;
    }
    addFunctionStatistics(tx, filename, functionName, functionId, insnCount);
    return retVal;
}
Beispiel #17
0
std::string
RoseBin_GMLGraph::getInternalNodes(  SgGraphNode* node,
                                     bool forward_analysis, SgAsmNode* internal) {

  SgAsmInstruction* bin_inst = isSgAsmInstruction(internal);
  SgAsmx86Instruction* control = isSgAsmx86Instruction(internal);
  // get the unparser string!
  string eval = "";
  string name="noname";
  string regs = "";

  // specifies that this node has no destination address
  nodest_jmp = false;
  // specifies that there is a node that has a call error (calling itself)
  error =false;
  // specifies a call to a unknown location
  nodest_call = false;
  // specifies where its an int instruction
  interrupt = false;
  // specifies whether a node has been visited (dfa)
  checked = false;

  dfa_standard = false;
  dfa_resolved_func =false;
  dfa_unresolved_func=false;
  string dfa_info="";
  string dfa_variable="";
  string visitedCounter="";

  map < int , string> node_p = node->get_properties();
  map < int , string>::iterator prop = node_p.begin();
  string type = "removed";//node->get_type();
  for (; prop!=node_p.end(); ++prop) {
    int addr = prop->first;
    // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl;
    if (addr==SgGraph::name)
      name = prop->second;
    else if (addr==SgGraph::eval)
      eval = prop->second;
    else if (addr==SgGraph::regs)
      regs = prop->second;
    else if (addr==SgGraph::nodest_jmp)
      nodest_jmp = true;
    else if (addr==SgGraph::itself_call)
      error = true;
    else if (addr==SgGraph::nodest_call)
      nodest_call = true;
    else if (addr==SgGraph::interrupt)
      interrupt = true;
    else if (addr==SgGraph::done)
      checked = true;
    else if (addr==SgGraph::dfa_standard)
      dfa_standard = true;
    else if (addr==SgGraph::dfa_resolved_func) {
      dfa_resolved_func = true;
      dfa_info = prop->second;
    } else if (addr==SgGraph::dfa_unresolved_func) {
      dfa_unresolved_func = true;
      dfa_info = prop->second;
    } else if (addr==SgGraph::dfa_variable) {
      dfa_variable = prop->second;
    } else if (addr==SgGraph::visitedCounter) {
      visitedCounter = prop->second;
    } else {
      cerr << " *************** dotgraph: unknown property found :: " << addr << endl;
    }
  }

  if (bin_inst) {
    type += " " + bin_inst->class_name();
  }

  string add = "";
  string typeNode = "";
  if (control->get_kind() == x86_call || control->get_kind() == x86_ret) {
    typeNode += " Type_ \"[ 67108864 FUNCTION_NODE ]\" \n";
    if (nodest_call)
      add = " FF9900 ";
    else if (error)
      add = " 3399FF ";
    else
      add = " FFCCFF ";
  } else if (control->get_kind() == x86_jmp) {
    typeNode += " Type_ \"[  67108864 FILE_NODE ]\" \n";
    if (nodest_jmp)
      add = " FF0000 ";
    else
      add = " 00FF00 ";
  } else
    if (x86InstructionIsControlTransfer(control)) {
      typeNode += " Type_ \"[  67108864 CLASS_NODE ]\" \n";
      if (control->get_kind() == x86_int)
        add = " 0000FF ";
      else
        add = " 008800 ";
    } else {
      add = " FFFF66 ";
    }
  if (checked)
    add = " 777777 ";

  if (dfa_standard)
    add = " FFFF00 ";
  if (dfa_resolved_func)
    add = " 00FF00 ";
  if (dfa_unresolved_func)
    add = " FF0000 ";

  string nodeStr = "";

  regs+=eval;
  // cant get the extra register info printed in gml format
  // because multiline is not supported? (tps 10/18/07)
  name = name/*+" " +regs + "  " +dfa_variable+" "+"vis:"+visitedCounter */;
  nodeStr= "   label \"" + name+"\"\n "+typeNode;
  int length = name.length();


  SgAsmx86Instruction* pre = NULL; // isSgAsmx86Instruction(bin_inst->cfgBinFlowInEdge());
  if (pre==NULL) {
    // first node
    nodeStr +="   first_ 1 \n";
  } else {
    if (pre->get_kind() == x86_ret || pre->get_kind() == x86_hlt) {
      // this instruction must be suspicious
      add =" 0000FF ";
    }
  }
  nodeStr += "  Node_Color_ " + add + "  \n";
  nodeStr += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"rectangle\" fill \"#" + add +  "\"  ]\n";



  return nodeStr;
}
Beispiel #18
0
void RoseBin_GMLGraph::printEdges( VirtualBinCFG::AuxiliaryInformation* info, bool forward_analysis, std::ofstream& myfile, SgDirectedGraphEdge* edge) {
  // traverse edges and visualize results of graph
    SgGraphNode* source = isSgGraphNode(edge->get_from());
    SgGraphNode* target = isSgGraphNode(edge->get_to());
    ROSE_ASSERT(source);
    ROSE_ASSERT(target);

    string edgeLabel="";
    map < int , string> edge_p = edge->get_properties();
    map < int , string>::iterator prop = edge_p.begin();
    //string type = node->get_type();
    for (; prop!=edge_p.end(); ++prop) {
      int addr = prop->first;
      // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl;
      if (addr==SgGraph::edgeLabel)
        edgeLabel = prop->second;
      if (edgeLabel.length()>1)
        if (edgeLabel[0]!='U')
          edgeLabel="";
    }

    SgAsmStatement* binStat_s = isSgAsmStatement(source->get_SgNode());
    SgAsmStatement* binStat_t = isSgAsmStatement(target->get_SgNode());
    if (binStat_s==NULL || binStat_t==NULL) {
      //cerr << "binStat_s==NULL || binStat_t==NULL" << endl;
    } else {
      map <SgAsmStatement*, int>::iterator it_s = nodesMap.find(binStat_s);
      map <SgAsmStatement*, int>::iterator it_t = nodesMap.find(binStat_t);
      int pos_s=0;
      int pos_t=0;
      if (it_s!=nodesMap.end())
        pos_s = it_s->second;
      if (it_t!=nodesMap.end())
        pos_t = it_t->second;

      if (pos_s==0 || pos_t==0) {
        //cerr << " GMLGraph edge, node == 0 " << endl;
      }

      string output = "edge [\n  label \""+edgeLabel+"\"\n source " + RoseBin_support::ToString(pos_s) +
        "\n   target " + RoseBin_support::ToString(pos_t) + "\n";

      // ------------------
      SgAsmx86Instruction* contrl = isSgAsmx86Instruction(source->get_SgNode());
      string add = "";
      if (contrl && x86InstructionIsControlTransfer(contrl)) {
        // the source is a control transfer function

        // we use either dest or dest_list
        // dest is used for single destinations during cfg run
        // dest_list is used for a static cfg image
        vector<VirtualBinCFG::CFGEdge> outEdges = contrl->cfgBinOutEdges(info);
        SgAsmx86Instruction* dest = isSgAsmx86Instruction(outEdges.empty() ? NULL : outEdges.back().target().getNode());
        bool dest_list_empty = true;
        if (contrl->get_kind() == x86_ret)
          dest_list_empty = outEdges.empty();

        SgAsmInstruction* nextNode = isSgAsmInstruction(target->get_SgNode());
        ROSE_ASSERT(nextNode);

        if (dest) {
          //string type = "jmp_if";
          if (dest==nextNode) {
            if (contrl->get_kind() == x86_call || contrl->get_kind() == x86_ret) {
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ]  ]\n";
            } else if (contrl->get_kind() == x86_jmp) {
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ]  ]\n";
            } else
              add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#00FF00\" ]  ]\n";
          } else
            if (forward_analysis &&
                (contrl->get_kind() == x86_call || contrl->get_kind() == x86_jmp)) {
              add += "   graphics [ type \"line\" arrow \"last\" fill \"#FFFF00\" ]  ]\n";
            }
        } else
          if (contrl->get_kind() == x86_ret ) { //&& dest_list_empty) {
            // in case of a multiple return
            add += "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#3399FF\" ]  ]\n";
          }
      }

      string type_n = getProperty(SgGraph::type, edge);
      if (type_n==RoseBin_support::ToString(SgGraph::usage)) {
        add = "   graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#000000\" ]  ]\n";
      }

      // skip the function declaration edges for now
      //      bool blankOutput=false;
      //if (skipFunctions)
      //if (isSgAsmFunction(binStat_s))
      //  blankOutput=true;
      if (skipInternalEdges) {
        SgAsmx86Instruction* contrl = isSgAsmx86Instruction(source->get_SgNode());
        if (contrl && x86InstructionIsControlTransfer(contrl) && contrl->get_kind() != x86_ret) {
          if (contrl->get_kind() == x86_call)
            output += "  Edge_Color_ FF0000  \n  Type_ \"[ 33554432 CALL_EDGE ]\" \n";
          else if (contrl->get_kind() == x86_jmp)
            output += "  Edge_Color_ 00FF00  \n  Type_ \"[ 33554432 FILECALL_EDGE ]\" \n";
          else
            output += "  Edge_Color_ 0000FF  \n   ";
        }
        //else
        //  blankOutput=true;
      }

      if (add=="")
        output += "   graphics [ type \"line\" arrow \"last\" fill \"#000000\" ]  ]\n";
      else output +=add;

      myfile << output;
    }

    //  }
  // ----------
    //  nodesMap.clear();

}
Beispiel #19
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) try {
        using namespace BinaryAnalysis::InstructionSemantics;

        static const char *name = "Analysis";
        using namespace InsnSemanticsExpr;
        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with
            // "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed assertion when ROSE tries to use
            // the solver.
            YicesSolver smt_solver;
            smt_solver.set_linkage(YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are annoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));            // branch to analysis address

#if 1
            {
                // This is a kludge.  If the first instruction is an indirect JMP then assume we're executing through a dynamic
                // linker thunk and execute the instruction concretely to advance the instruction pointer.
                SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(analysis_addr));
                if (x86_jmp==insn->get_kind()) {
                    VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType> p;
                    X86InstructionSemantics<VirtualMachineSemantics::Policy<VirtualMachineSemantics::State,
                                                                            VirtualMachineSemantics::ValueType>,
                                            VirtualMachineSemantics::ValueType> sem(p);
                    p.set_map(args.thread->get_process()->get_memory()); // won't be thread safe
                    sem.processInstruction(insn);
                    policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value()));
                    trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64,
                                name, analysis_addr, p.readRegister<32>("eip").known_value());
                }
            }
#endif

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            size_t nbranches = 0;
            std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str());
                semantics.processInstruction(insn);
                if (policy.readRegister<32>("eip").is_known())
                    continue;
                
                bool complete;
                std::set<rose_addr_t> succs = insn->get_successors(&complete);
                if (complete && 2==succs.size()) {
                    if (nbranches>=take_branch.size()) {
                        std::ostringstream s; s<<policy.readRegister<32>("eip");
                        trace->mesg("%s: EIP = %s", name, s.str().c_str());
                        trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name);
                        throw this;
                    }

                    // Decide whether we should take the branch or not.
                    bool take = take_branch[nbranches++];
                    rose_addr_t target = 0;
                    for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) {
                        if ((take && *si!=insn->get_address()+insn->get_size()) ||
                            (!take && *si==insn->get_address()+insn->get_size()))
                            target = *si;
                    }
                    assert(target!=0);
                    trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target);

                    // Is this path feasible?  We don't really need to check it now; we could wait until the end.
                    InternalNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(),
                                                             LeafNode::create_integer(32, target));
                    constraints.push_back(c); // shouldn't really have to do this again if we could save some state
                    if (smt_solver.satisfiable(constraints)) {
                        policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target));
                    } else {
                        trace->mesg("%s: chosen control flow path is not feasible.", name);
                        break;
                    }
                }
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<name <<": symbolic return value is " <<result <<"\n"
                  <<name <<": return value has " <<vars.size() <<" variables:";
                for (std::set<InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                s <<"\n";
                if (!constraints.empty()) {
                    s <<name <<": path constraints:\n";
                    for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci)
                        s <<name <<":   " <<*ci <<"\n";
                }
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.get_definition(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("%s: evaluation result could not be determined. ERROR!", name);
                    } else if (!result_value->is_known()) {
                        trace->mesg("%s: evaluation result is not constant. ERROR!", name);
                    } else {
                        trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value());
                    }
                } else {
                    trace->mesg("%s: expression is not satisfiable.", name);
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                            LeafNode::create_integer(32, 0xff015e7c));
                exprs.push_back(expr);
                if (smt_solver.satisfiable(exprs)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.get_definition(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("%s:   v%"PRIu64" = %"PRIu64" %c",
                                        name, (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("%s:   expression is not satisfiable.  No solutions.", name);
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    } catch (const Analysis*) {
        args.thread->get_process()->get_simulator()->activate();
        throw;
    }
bool 
CompassAnalyses::BinaryInterruptAnalysis::Traversal::run(string& name, SgGraphNode* node,
                                              SgGraphNode* previous){
  // check known function calls and resolve variables
  ROSE_ASSERT(node);
  vector<uint64_t> val_rax, val_rbx, val_rcx, val_rdx ;
  std::vector<uint64_t> pos_rax, pos_rbx, pos_rcx, pos_rdx;
  uint64_t fpos_rax, fpos_rbx, fpos_rcx, fpos_rdx=0xffffffff;

  SgAsmx86Instruction* asmNode = isSgAsmx86Instruction(node->get_SgNode());
  if (asmNode) {
    //    cerr << " Interrupt Analysis :: checking node " << RoseBin_support::HexToString(asmNode->get_address())
    //	 << "  - " << toString(asmNode->get_kind()) << endl;
    // ANALYSIS 1 : INTERRUPT DETECTION -------------------------------------------

    // verify all interrupts and make sure they do what one expects them to do.
    if (asmNode->get_kind() == x86_int) {
      if (RoseBin_support::DEBUG_MODE()) 
	cout << "    " << name << " : found int call " << endl;
      // need to resolve rax, rbx, rcx, rdx
      // therefore get the definition for each
      getValueForDefinition(val_rax, pos_rax, fpos_rax, node, std::make_pair(x86_regclass_gpr, x86_gpr_ax));
      getValueForDefinition(val_rbx, pos_rbx, fpos_rbx, node, std::make_pair(x86_regclass_gpr, x86_gpr_bx));
      getValueForDefinition(val_rcx, pos_rcx, fpos_rcx, node, std::make_pair(x86_regclass_gpr, x86_gpr_cx));
      getValueForDefinition(val_rdx, pos_rdx, fpos_rdx, node, std::make_pair(x86_regclass_gpr, x86_gpr_dx));

      string int_name = "unknown ";

      DataTypes data_ebx = unknown;
      DataTypes data_ecx = unknown;
      DataTypes data_edx = unknown;

      bool ambigious_inst=false;
      if (val_rax.size()>1)
	ambigious_inst = true;
      else
        if (val_rax.size()==1) {
          uint64_t rax = *(val_rax.begin());
          int_name = getIntCallName(rax, data_ebx, data_ecx, data_edx,
                                    val_rbx, val_rcx, val_rdx, 
                                    pos_rbx, pos_rcx, pos_rdx,
                                    fpos_rbx, fpos_rcx, fpos_rdx);
          ambigious_inst = false;
        }

      if (ambigious_inst) {      
	string value = "";
	vector<uint64_t>::iterator it = val_rax.begin();
	for (;it!=val_rax.end();++it) {
	  string i_name = getIntCallName(*it, data_ebx, data_ecx, data_edx,
                                         val_rbx, val_rcx, val_rdx, 
					 pos_rbx, pos_rcx, pos_rdx,
					 fpos_rbx, fpos_rcx, fpos_rdx);
          value +="rAX:"+RoseBin_support::HexToString(*it)+" "+i_name+" ";
	  //	  createVariable(fpos_rax, pos_rax, "rax", data_ebx, "rax", 0, val_rax,false); 
	}

	//cerr << " DataFlow::VariableAnalysis . Ambigious INT call: " <<
	//  vizzGraph->getProperty(SgGraph::name, node) << " - " << value << endl;
	value = "PROBLEM: " + value; 
	node->append_properties(SgGraph::dfa_unresolved_func,value);

      } else {
	// we know what INT instruction it is
	string t_ebx = RoseBin_support::getTypeName(data_ebx);
	string t_ecx = RoseBin_support::getTypeName(data_ecx);
	string t_edx = RoseBin_support::getTypeName(data_edx);

	int_name += " ("+t_ebx+","+t_ecx+","+t_edx+")";
	//if (RoseBin_support::DEBUG_MODE()) 
	// cout << " found INT call : " << value << " .. " << int_name << endl;
	node->append_properties(SgGraph::dfa_variable,int_name);
      }
    }
  }
  return false;
}
/*
 * Detect functions (blocks) that can be merged together.
 */
void
RoseBin_FlowAnalysis::resolveFunctions(SgAsmNode* globalNode) {
  //cerr << " ObjDump-BinRose:: Detecting and merging Functions" << endl;
  vector<SgAsmFunction*> visitedFunctions;
  vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmFunction);
  //  vector<SgNode*>::iterator itV = tree.begin();
  int nr=0;
  while (!tree.empty()) {
    //  for (;itV!=tree.end();itV++) {
    SgAsmFunction* funcD = isSgAsmFunction(tree.back());
    tree.pop_back();
    nr++;
    if ((nr % 100)==0)
      if (RoseBin_support::DEBUG_MODE())
        cerr << " funcListSize : " << tree.size() << "  -- iteration : " << nr << "   func " << funcD->get_name() << endl;

    //SgAsmFunction* funcD = isSgAsmFunction(*itV);
    //itV++;
    ROSE_ASSERT(funcD);
    // make sure we dont visit a function twice


    vector <SgNode*> funcVec =funcD->get_traversalSuccessorContainer();
    int last = funcVec.size()-1;
    if (last<0)
      continue;
    bool hasStopCondition=false;
    for (unsigned int itf = 0; itf < funcVec.size() ; itf++) {
      SgAsmx86Instruction* finst = isSgAsmx86Instruction(funcVec[itf]);
      ROSE_ASSERT(finst);
      if (finst->get_kind() == x86_ret || finst->get_kind() == x86_hlt) {
        hasStopCondition=true;
      }
    }
    //cerr << " last : " << last << endl;
    SgAsmx86Instruction* lastInst = isSgAsmx86Instruction(funcVec[last]);
    ROSE_ASSERT(lastInst);
    SgAsmx86Instruction* nextInst = isSgAsmx86Instruction(resolveFunction(lastInst, hasStopCondition));
    if (nextInst) {
      SgAsmFunction* nextFunc = isSgAsmFunction(nextInst->get_parent());
      if (nextFunc) {
        ROSE_ASSERT(g_algo->info);
        g_algo->info->returnTargets[funcD].insert(g_algo->info->returnTargets[nextFunc].begin(), g_algo->info->returnTargets[nextFunc].end());
        // make sure that this function is being changed and should not be covered again
        //visitedFunctions.push_back(nextFunc);
        // visit current function after alternation again
        //tree.push_back(funcD);
        // now we remove this next function and iterate thrgouh all instructions and
        // attach them to the old function
        vector <SgNode*> funcNextVec =nextFunc->get_traversalSuccessorContainer();
        for (unsigned int i=0; i < funcNextVec.size(); ++i) {
          SgAsmInstruction* inst = isSgAsmInstruction(funcNextVec[i]);
          ROSE_ASSERT(inst);
          inst->set_parent(funcD);
          funcD->append_statement(inst);
          //nextFunc->remove_statement(inst);
          // delete nextFunc; // should delete this later when iterator is done
        }
        nextFunc->remove_children();
        nextFunc->set_parent(NULL);
        isSgAsmBlock(globalNode)->remove_statement(nextFunc);
      }
    }
  } // for

}
Beispiel #22
0
void
InitPointerToNull::visit(SgNode* node) {
  if (isSgAsmFunction(node)) {
    memoryWrites.clear();
    memoryRead.clear();
  } else

  if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) {
    // this is the address of the mov instruction prior to the call
    //rose_addr_t resolveAddr=0;
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(node);
    SgNode* instBlock = NULL;
    if (project) 
      instBlock= isSgAsmBlock(inst->get_parent());
    else //we run IDA, this is different
      instBlock=inst;

    if (instBlock==NULL)
      return;
    SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent());
    if (instFunc==NULL)
      return;

    // we have found a mov instruction
    // we need to check if it is a   mov mem, (value or reg) // assignment of variable // forgot mov mem, mem
    // or we find a                  mov reg, mem // usage of variable
    // make sure a variable is assigned before used
    SgAsmOperandList * ops = inst->get_operandList();
    SgAsmExpressionPtrList& opsList = ops->get_operands();
    SgAsmExpressionPtrList::iterator itOP = opsList.begin();
    SgAsmMemoryReferenceExpression* memL=NULL;
    SgAsmMemoryReferenceExpression* memR=NULL;
    SgAsmRegisterReferenceExpression* regL=NULL;
    SgAsmRegisterReferenceExpression* regR=NULL;
    SgAsmValueExpression* Val = NULL;
    int iteration=0;
    for (;itOP!=opsList.end();++itOP) {
      SgAsmExpression* exp = *itOP;
      ROSE_ASSERT(exp);
      if (iteration==1) {
	// right hand side
	memR = isSgAsmMemoryReferenceExpression(exp);
	regR = isSgAsmRegisterReferenceExpression(exp);
	Val = isSgAsmValueExpression(exp);
      }
      if (iteration==0) {
	// left hand side
	memL = isSgAsmMemoryReferenceExpression(exp);
	regL = isSgAsmRegisterReferenceExpression(exp);
	iteration++;
      }
    } //for
    if ((memL && regR) || (memL && Val) || (memL && memR)) {
      // could be assignment to address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL);      
      // apparently the reference to memory does not always have to be BP but
      // can also be IP if it is a static variable. How will we handle global variables?
      //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL);
      //if (containsBP) {
	// this is memory write with offset to BP
	// remember this memory location as a write
	if (debug)
	cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	memoryWrites.insert(addr);
	//}
    } else if (regL && memR) {
      // could be usage of address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR);      
      bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR);
      if (containsBP) {
	// this is memory read with offset to BP
	// did we see a write for this? If not, it is not initialized!
	std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr);
	if (it!=memoryWrites.end()) {
	  // found write, everything is good
	if (debug)
	  cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	} else {
	  std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr);
	  if (it2!=memoryRead.end()) {
	    // found this case before
	  } else {
	if (debug)
	    cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl;
	    string res = "Possibly uninitialized variable: ";
	    string funcname="";
	    SgAsmBlock* b = isSgAsmBlock(inst->get_parent());
	    SgAsmFunction* func = NULL;
	    if (b)
	      func=isSgAsmFunction(b->get_parent()); 
	    if (func)
	      funcname = func->get_name();
	    res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+
	      " <"+inst->get_comment()+">  in function: "+funcname;
	    result[inst]= res;
	    memoryRead.insert(addr);
	  }
	}
      }
    }
  }
}
void
RoseBin_FlowAnalysis::process_jumps() {
    if (RoseBin_support::DEBUG_MODE())
      cerr << "\n >>>>>>>>> processing jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it;
  for (it=rememberInstructions.begin();it!=rememberInstructions.end();++it) {
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(it->second);
    if (inst->get_kind() == x86_call) {
      //cerr << "Found call at " << std::hex << inst->get_address() << endl;
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        //cerr << "Target is " << std::hex << target->get_address() << endl;
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
        // tps: changed this algorithm so that it runs in
        // linear time!
        ROSE_ASSERT (target->get_parent());
        if (target->get_parent()) {
          // ROSE_ASSERT(target->get_parent());
          SgAsmNode* b_b = target;
          if (!db)
            b_b = isSgAsmNode(target->get_parent());
          ROSE_ASSERT(b_b);
          SgAsmFunction* b_func = isSgAsmFunction(b_b->get_parent());

          if (b_func) {
            // (16/Oct/07) tps: this is tricky, it appears that sometimes the target can
            // be just a jmp to a new location, so we should forward this information to the correct
            // function.
            // Therefore we need to check if the current function has a return statement.
            // If not, we want to forward this information.
            if (target->get_kind() == x86_jmp) {
              //cerr << " >>>>>>>> found a jmp target - number of children: " << b_func->get_traversalSuccessorContainer().size() << endl;
              if (b_func->get_numberOfTraversalSuccessors()==1) {
                SgAsmx86Instruction* target2 = isSgAsmx86Instruction(process_jumps_get_target(inst));
                if (target2) {
                  b_b = target2;
                  if (!db)
                    b_b = isSgAsmNode(target2->get_parent());
                  b_func = isSgAsmFunction(b_b->get_parent());
                }
              }
            }


            if (inst->get_parent()) {
              //cerr << "Inst has a parent" << endl;
              if (inst->get_comment()=="")
                inst->set_comment(""+b_func->get_name());
              ROSE_ASSERT(g_algo->info);
              SgAsmInstruction* inst_after = g_algo->info->getInstructionAtAddress(inst->get_address() + inst->get_raw_bytes().size()); // inst->cfgBinFlowOutEdge(info);
              if (inst_after) {
                //cerr << "Added dest " << std::hex << isSgAsmStatement(inst_after)->get_address() << " for function" << endl;
                b_func->append_dest(isSgAsmStatement(inst_after));
              }
            }
          } else {
            if (RoseBin_support::DEBUG_MODE())
            cerr << " NO FUNCTION DETECTED ABOVE BLOCK . " << endl;
          }

        } else {
          if (RoseBin_support::DEBUG_MODE())
            cerr << "   WARNING :: process_jumps: target has no parent ... i.e. no FunctionDeclaration to it " <<
            target->class_name() << endl;
        }
      } else {
        if (inst)
          if (RoseBin_support::DEBUG_MODE())
            cerr << "    WARNING :: process_jumps: No target found for node " << RoseBin_support::HexToString(inst->get_address())
                 << "   " << inst->get_mnemonic() << endl;
      }
    } else {

      // might be a jmp
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
      }
    }
  }
  //cerr << "\n >>>>>>>>> processing jumps ... done. " << endl;

  //  cerr << "\n >>>>>>>>> resolving RET jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it2;
  for (it2=rememberInstructions.begin();it2!=rememberInstructions.end();++it2) {
    //int id = it2->first;
    SgAsmx86Instruction* target = isSgAsmx86Instruction(it2->second);
    ROSE_ASSERT (target);
#if 1
    if (target->get_kind() == x86_ret) {
      SgAsmNode* b_b = target;
      if (!db)
        b_b = isSgAsmNode(target->get_parent());
      SgAsmFunction* parent = isSgAsmFunction(b_b->get_parent());
      if (parent) {
        //ROSE_ASSERT(parent);
        std::vector <SgAsmStatement*> dest_list = parent->get_dest();
        for (size_t i = 0; i < dest_list.size(); ++i) {
          ROSE_ASSERT (isSgAsmInstruction(dest_list[i]));
          //cerr << "Adding ret target " << std::hex << dest_list[i]->get_address() << " to " << std::hex << target->get_address() << endl;
          //info->indirectJumpAndReturnTargets[target].insert(dest_list[i]->get_address());
          ROSE_ASSERT(g_algo->info);
          g_algo->info->incomingEdges[isSgAsmInstruction(dest_list[i])].insert(target->get_address());
        }

        std::vector <SgAsmStatement*>::iterator it3 = dest_list.begin();
        for (; it3!=dest_list.end();++it3) {
          SgAsmInstruction* dest = isSgAsmInstruction(*it3);
          if (dest) {
            dest->append_sources(target);
            //cerr << " appending source to " << dest->get_address() << "   target: " << target->get_address() << endl;
          }
        } // for
      } else { // if parent
        if (RoseBin_support::DEBUG_MODE())
          cerr << "   ERROR :: RET jumps :: no parent found for ret : " << target->class_name() << endl;
        //exit (0);
      }
    } // if ret
#endif
  }
  if (RoseBin_support::DEBUG_MODE())
    cerr << " >>>>>>>>> resolving RET jumps ... done." << endl;
}
int64_t
RoseBin_DataFlowAbstract::trackValueForRegister(
                                                SgGraphNode* node,
                                                std::pair<X86RegisterClass, int>  codeSearch,
                                                bool& cantTrack,
                                                SgAsmx86RegisterReferenceExpression* refExpr_rightHand) {
  int64_t value = 0xffffffff;
  if (RoseBin_support::DEBUG_MODE())
    cout << "    ........ trying to resolve value for register :: " << codeSearch.first << "." << codeSearch.second << endl;

  SgAsmx86Instruction* inst = isSgAsmx86Instruction(node->get_SgNode());
  ROSE_ASSERT(inst);
  std::pair<X86RegisterClass, int>  code = std::make_pair((X86RegisterClass)refExpr_rightHand->get_descriptor().get_major(),
                                                          refExpr_rightHand->get_descriptor().get_minor());
  // iterate up and find an assignment to this register codeSearch i.e. instr codeSearch, esi

  bool condInst = RoseBin_support::isConditionalInstruction(inst);
  bool condInstFlag = RoseBin_support::isConditionalFlagInstruction(inst);
  if (condInstFlag==false) {
    // the instruction is not dependent on a flag
    if (condInst==false) {
      // the instruction is not dependent on a value in one of its operands
      // easiest track
      SgGraphNode* previous = getPredecessor(node);
        /*
      vector <SgGraphNode*> vec;
      vizzGraph->getPredecessors(node, vec);
      if (vec.size()==1) {
        // found one predecessor
        SgGraphNode* previous = vec.back();
        ROSE_ASSERT(previous);
        string name = vizzGraph->getProperty(SgGraph::name, previous);
        if (RoseBin_support::DEBUG_MODE())
          cout << "    tracking recursive var " << name << endl;

        value = trackValueForRegister(previous, code, cantTrack, refExpr_rightHand);

      } else if (vec.size()>1) {
        cerr << " Tracking:: Problem, we have more than one predecessor for a node... cant track this " << endl;
        exit(0);
      }
        */
        value = trackValueForRegister(previous, code, cantTrack, refExpr_rightHand);
    } else {
      // the instruction is dependent on a value in one of its operands
      // e.g. cmovz eax, esi (moved only if esi=0);
      // need to track the value of esi to track the value of eax .. more complicated!
      int addr = inst->get_address();
      if (RoseBin_support::DEBUG_MODE()) {
        cout << " ERROR ------------------------------------------ " << endl;
              cout << RoseBin_support::HexToString(addr) << "  " << inst->class_name() <<
                " -- CANT resolve the value of the register because it depends on CONDITION -- code " <<
                code.first << "." << code.second << endl;
      }
      cantTrack =true;
          }

  } else {
    // the instruction is dependent on a flag

    int addr = inst->get_address();
    if (RoseBin_support::DEBUG_MODE()) {
      cout << " ERROR ------------------------------------------ " << endl;
      cout << RoseBin_support::HexToString(addr) << "  " << inst->class_name() <<
        " -- CANT resolve the value of the register because it depends on FLAGS -- code " <<
        code.first << "." << code.second << endl;
    }
    cantTrack =true;
  }


  return value;
}
Beispiel #25
0
 void visit(SgNode *node) {
     SgAsmx86Instruction *insn = isSgAsmx86Instruction(node);
     SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn);
     if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
         insert(std::make_pair(insn->get_address(), insn));
 }
Beispiel #26
0
int main(int argc, char** argv) {

  std::string binaryFilename = (argc >= 1 ? argv[argc-1]   : "" );
  std::vector<std::string> newArgv(argv,argv+argc);
  newArgv.push_back("-rose:output");
  newArgv.push_back(binaryFilename+"-binarySemantics.C");

  SgProject* proj = frontend(newArgv);
  
  ROSE_ASSERT (proj);
  SgSourceFile* newFile = isSgSourceFile(proj->get_fileList().front());
  ROSE_ASSERT(newFile != NULL);
  SgGlobal* g = newFile->get_globalScope();
  ROSE_ASSERT (g);

  //I am doing some experimental work to enable functions in the C representation
  //Set this flag to true in order to enable that work
  bool enable_functions = true;
  //Jeremiah did some work to enable a simplification and normalization of the 
  //C representation. Enable this work by setting this flag to true.
  bool enable_normalizations = false;

  vector<SgNode*> asmFiles = NodeQuery::querySubTree(proj, V_SgAsmGenericFile);
  ROSE_ASSERT (asmFiles.size() == 1);



  if( enable_functions == false)
  {
    //Representation of C normalizations withotu functions
    SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration("run", SgTypeVoid::createType(), buildFunctionParameterList(), g);
    appendStatement(decl, g);
    SgBasicBlock* body = decl->get_definition()->get_body();
    //  ROSE_ASSERT(isSgAsmFile(asmFiles[0]));
    //  X86CTranslationPolicy policy(newFile, isSgAsmFile(asmFiles[0]));
    X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
    ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);

    policy.switchBody = buildBasicBlock();
    removeDeadStores(policy.switchBody,policy);

    SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
    ROSE_ASSERT(isSgBasicBlock(sw->get_body()));

    SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);

    appendStatement(whileStmt, body);
    policy.whileBody = sw;

    X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
    //AS FIXME: This query gets noting in the form in the repository. Doing this hack since we only 
    //have one binary file anyways.
    //vector<SgNode*> instructions = NodeQuery::querySubTree(asmFiles[0], V_SgAsmx86Instruction);
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmx86Instruction);

    std::cout << "Instruction\n";
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmx86Instruction* insn = isSgAsmx86Instruction(instructions[i]);
      ROSE_ASSERT (insn);
      try {
          t.processInstruction(insn);
      } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
          std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
      }
    }


    if ( enable_normalizations == true )
    {
      //Enable normalizations of C representation
      //This is done heuristically where some steps
      //are repeated. It is not clear which order is 
      //the best
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
        removeUnusedVariables(policy.switchBody);
      }
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
      }
      removeUnusedVariables(policy.switchBody);
    }

  
  }else{ //Experimental changes to introduce functions into the C representation


    //When trying to add function I get that symbols are not defined

    //Iterate over the functions separately
    vector<SgNode*> asmFunctions = NodeQuery::querySubTree(proj, V_SgAsmFunction);

    for(size_t j = 0; j < asmFunctions.size(); j++ )
    {
      SgAsmFunction* binFunc = isSgAsmFunction( asmFunctions[j] );

      // Some functions (probably just one) are generated to hold basic blocks that could not
      // be assigned to a particular function. This happens when the Disassembler is overzealous
      // and the Partitioner cannot statically determine where the block belongs.  The name of
      // one such function is "***uncategorized blocks***".  [matzke 2010-06-29]
      if ((binFunc->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
        continue;

      //Some functions may be unnamed so we need to generate a name for those
      std::string funcName;
      if (binFunc->get_name().size()==0) {
	char addr_str[64];
	sprintf(addr_str, "0x%"PRIx64, binFunc->get_statementList()[0]->get_address());
	funcName = std::string("my_") + addr_str;;
      } else {
	funcName = "my" + binFunc->get_name();
      }

      //Functions can have illegal characters in their name. Need to replace those characters
      for ( int i = 0 ; i < funcName.size(); i++ )
      {
	char& currentCharacter = funcName.at(i);
	if ( currentCharacter == '.' )
	  currentCharacter = '_';
      }


      SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration(funcName, SgTypeVoid::createType(), buildFunctionParameterList(), g);

      appendStatement(decl, g);
      SgBasicBlock* body = decl->get_definition()->get_body();
      X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
      ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);
      policy.switchBody = buildBasicBlock();
      SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
      SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);
      appendStatement(whileStmt, body);
      policy.whileBody = sw;
      X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
      vector<SgNode*> instructions = NodeQuery::querySubTree(binFunc, V_SgAsmx86Instruction);

      for (size_t i = 0; i < instructions.size(); ++i) {
        SgAsmx86Instruction* insn = isSgAsmx86Instruction(instructions[i]);
	if( insn->get_kind() == x86_nop )
	  continue;
        ROSE_ASSERT (insn);
        try {
            t.processInstruction(insn);
        } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
            std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
        }
      }

    }

    //addDirectJumpsToSwitchCases(policy);


  }

  proj->get_fileList().erase(proj->get_fileList().end() - 1); // Remove binary file before calling backend

//  AstTests::runAllTests(proj);

  //Compile the resulting project

  return backend(proj);
}
Beispiel #27
0
SgAsmInstruction*
SageBuilderAsm::buildMultibyteNopInstruction(int n)
   {
  // DQ (5/1/2010): Support for building multi-byte NOP instructions.
  // this is x86 specific which is OK for now.
  // SgAsmInstruction* instruction = NULL;

     ROSE_ASSERT(n > 0);
     ROSE_ASSERT(n <= 9);

     uint64_t ip                             = 0; /* Virtual address for start of instruction */
     std::string mnemonic                    = "nop";
     X86InstructionKind kind                 = x86_nop;
     X86InstructionSize insnSize             = x86_insnsize_32; /* Default size of instructions, based on architecture; see init() */
     X86InstructionSize effectiveOperandSize = x86_insnsize_32;
     X86InstructionSize effectiveAddressSize = x86_insnsize_32;

     bool lock                               = false;
     X86RepeatPrefix repeatPrefix            = x86_repeat_none;
     bool branchPredictionEnabled            = false;
     X86BranchPrediction branchPrediction    = x86_branch_prediction_none;

  // SgUnsignedCharList insnbuf(0x90);
     SgUnsignedCharList insnbuf;
     size_t insnbufat                        = size_t(n);  /* Index of next byte to be read from or write to insnbuf */

     SgAsmx86Instruction *instruction = NULL;

     instruction = new SgAsmx86Instruction(ip, mnemonic, kind, insnSize, effectiveOperandSize, effectiveAddressSize);
     ROSE_ASSERT(instruction != NULL);

  // Here we are building a simpler version of multi-byte nop using repeated prefixes.
     for (int i = 1; i < n; i++)
        {
          insnbuf.push_back(0x66);
        }

     insnbuf.push_back(0x90);

#if 0
  // This switch will implement proper multi-byte NOPs (not implemented yet).
     switch(n)
        {
          case 1:
             {
            // instruction->set_raw_bytes(SgUnsignedCharList(&(insnbuf[0]), &(insnbuf[0])+insnbufat));
               insnbuf.push_front(0x66);
               break;
             }

       // case 2: instruction = makeInstruction(x86_nop, "nop", modrm); break;

          default:
             {
               printf ("Error: SageBuilderAsm::buildMultibyteNopInstruction(n=%d) not supported \n",n);
               ROSE_ASSERT(false);
             }
        }
#endif

     instruction->set_raw_bytes(SgUnsignedCharList(&(insnbuf[0]), &(insnbuf[0])+insnbufat));
     ROSE_ASSERT(instruction != NULL);

     instruction->set_lockPrefix(lock);
     instruction->set_repeatPrefix(repeatPrefix);

     if (branchPredictionEnabled)
          instruction->set_branchPrediction(branchPrediction);

     SgAsmOperandList *operands = new SgAsmOperandList();
     instruction->set_operandList(operands);
     operands->set_parent(instruction);

     return instruction;
   }
Beispiel #28
0
/* Analyze a single interpretation a block at a time */
static void
analyze_interp(SgAsmInterpretation *interp)
{
    /* Get the set of all instructions except instructions that are part of left-over blocks. */
    struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmx86Instruction*> {
        void visit(SgNode *node) {
            SgAsmx86Instruction *insn = isSgAsmx86Instruction(node);
            SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn);
            if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
                insert(std::make_pair(insn->get_address(), insn));
        }
    } insns;
    insns.traverse(interp, postorder);

    while (!insns.empty()) {
        std::cout <<"=====================================================================================\n"
                  <<"=== Starting a new basic block                                                    ===\n"
                  <<"=====================================================================================\n";
        AllInstructions::iterator si = insns.begin();
        SgAsmx86Instruction *insn = si->second;
        insns.erase(si);

#if SEMANTIC_API == NEW_API
        BaseSemantics::RiscOperatorsPtr operators = make_ops();
        BaseSemantics::Formatter formatter;
        formatter.set_suppress_initial_values();
        BaseSemantics::DispatcherPtr dispatcher;
        if (do_trace) {
            TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators);
            trace->set_stream(stdout);
            dispatcher = DispatcherX86::instance(trace);
        } else {
            dispatcher = DispatcherX86::instance(operators);
        }
        operators->set_solver(make_solver());
#else   // OLD_API
        typedef X86InstructionSemantics<MyPolicy, MyValueType> MyDispatcher;
        MyPolicy operators;
        MyDispatcher dispatcher(operators);
#   if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN
        operators.set_solver(make_solver());
        SymbolicSemantics::Formatter formatter;
        formatter.expr_formatter.do_rename = true;
        formatter.expr_formatter.add_renames = true;
#   elif SEMANTIC_DOMAIN != FINDCONST_DOMAIN && SEMANTIC_DOMAIN != FINDCONSTABI_DOMAIN
        BaseSemantics::Formatter formatter;
#   endif
#endif

#if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API
        BaseSemantics::SValuePtr orig_esp;
        if (do_test_subst) {
            // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated
            // in the state, which is printed in the output, and thus changes the answer.
            BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large();
            orig_esp = operators->readRegister(*regdict->lookup("esp"));
            std::cout <<"Original state:\n" <<*operators;
        }
#endif

        /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of
         * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already
         * been processed. */
        while (1) {
            /* Analyze current instruction */
            std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n";
#if SEMANTIC_API == NEW_API
            try {
                dispatcher->processInstruction(insn);
#   if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/
                show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output
#   else
                std::cout <<(*operators + formatter);
#   endif
            } catch (const BaseSemantics::Exception &e) {
                std::cout <<e <<"\n";
            }
#else       // OLD API
            try {
                dispatcher.processInstruction(insn);
#   if SEMANTIC_DOMAIN == FINDCONST_DOMAIN || SEMANTIC_DOMAIN == FINDCONSTABI_DOMAIN
                operators.print(std::cout);
#   else
                operators.print(std::cout, formatter);
#   endif
            } catch (const MyDispatcher::Exception &e) {
                std::cout <<e <<"\n";
                break;
#   if SEMANTIC_DOMAIN == PARTSYM_DOMAIN
            } catch (const MyPolicy::Exception &e) {
                std::cout <<e <<"\n";
                break;
#   endif
            } catch (const SMTSolver::Exception &e) {
                std::cout <<e <<" [ "<<unparseInstructionWithAddress(insn) <<"]\n";
                break;
            }
#endif

            /* Never follow CALL instructions */
            if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall)
                break;

            /* Get next instruction of this block */
#if SEMANTIC_API == NEW_API
            BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip"));
            if (!ip->is_number())
                break;
            rose_addr_t next_addr = ip->get_number();
#else       // OLD_API
#   if SEMANTIC_DOMAIN == PARTSYM_DOMAIN || SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN
            MyValueType<32> ip = operators.get_ip();
            if (!ip.is_known()) break;
            rose_addr_t next_addr = ip.known_value();
#   elif SEMANTIC_DOMAIN == NULL_DOMAIN || SEMANTIC_DOMAIN == INTERVAL_DOMAIN
            MyValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP);
            if (!ip.is_known()) break;
            rose_addr_t next_addr = ip.known_value();
#   elif SEMANTIC_DOMAIN == MULTI_DOMAIN
            PartialSymbolicSemantics::ValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP)
                                                         .get_subvalue(MyMultiSemanticsClass::SP0());
            if (!ip.is_known()) break;
            rose_addr_t next_addr = ip.known_value();
#   else
            if (operators.newIp->get().name) break;
            rose_addr_t next_addr = operators.newIp->get().offset;
#   endif
#endif
            si = insns.find(next_addr);
            if (si==insns.end()) break;
            insn = si->second;
            insns.erase(si);
        }

        // Test substitution on the symbolic state.
#if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API
        if (do_test_subst) {
            SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp);
            BaseSemantics::SValuePtr newvar = operators->undefined_(32);
            newvar->set_comment("frame_pointer");
            SymbolicSemantics::SValuePtr to =
                SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4)));
            std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n";
            SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to);
            std::cout <<"Substituted state:\n" <<(*operators+formatter);
        }
#endif
    }
}
Beispiel #29
0
 void visit(SgNode *node) {
     SgAsmx86Instruction *insn = isSgAsmx86Instruction(node);
     if (insn)
         insns[insn->get_address()] = insn;
 }