Beispiel #1
0
std::string normalizeInstructionsToHTML(std::vector<SgAsmx86Instruction*>::iterator beg, 
    std::vector<SgAsmx86Instruction*>::iterator end)
{
    string normalizedUnparsedInstructions;
    map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands( beg,end, valueNumbers);

    // Unparse the normalized forms of the instructions
    for (; beg != end; ++beg ) {
      SgAsmx86Instruction* insn = *beg;
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      mne = "<font color=\"red\">" + htmlEscape(mne)+"</font>";

      normalizedUnparsedInstructions += mne;
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      // Add to total for this variant
      // Add to total for each kind of operand
      size_t operandCount = operands.size();

      normalizedUnparsedInstructions += "<font color=\"blue\">";
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;

        normalizedUnparsedInstructions += (cat == ec_reg ? " R" : cat == ec_mem ? " M" : " V") + boost::lexical_cast<string>(num);
      }
      normalizedUnparsedInstructions += "; </font> <br> ";
  
    }
   
    return normalizedUnparsedInstructions;
};
Beispiel #2
0
bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, sqlite3_connection& con) { // Ignores function boundaries
  bool retVal = false;
  vector<SgAsmx86Instruction*> insns;
  FindInstructionsVisitor vis;
  AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
  std::cout << "Number of instructions: " << insns.size() << std::endl;
  size_t insnCount = insns.size();

  for (size_t windowStart = 0;
       windowStart + windowSize <= insnCount;
       windowStart += stride) {
    static SignatureVector vec;
    vec.clear();
    hash_map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands(&insns[windowStart], windowSize, valueNumbers);
    string normalizedUnparsedInstructions;
    // Unparse the normalized forms of the instructions
    for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
      SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
      size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      normalizedUnparsedInstructions += mne;
#endif
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      size_t operandCount = operands.size();
      // Add to total for this variant
      ++vec.totalForVariant(var);
      // Add to total for each kind of operand
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        ++vec.opsForVariant(cat, var);
        // Add to total for this unique operand number (for this window)
        hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;
        ++vec.specificOp(cat, num);
        // Add to total for this kind of operand
        ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
        normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num);
#endif
      }

	  //Try to see what the effect is of jumps on the false positive rate
	  //uint64_t addr =0;
          /*
	  if( x86GetKnownBranchTarget(insn, addr) == true  )
	  {
		uint64_t insn_addr = insn->get_address();
		if( addr < insn_addr )
		  normalizedUnparsedInstructions += " UP ";
		else
		  normalizedUnparsedInstructions += " DOWN ";
	  }*/
	  
      // Add to total for this pair of operand kinds
      if (operandCount >= 2) {
        ExpressionCategory cat1 = getCategory(operands[0]);
        ExpressionCategory cat2 = getCategory(operands[1]);
        ++vec.operandPair(cat1, cat2);
      }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      if (insnNumber + 1 < windowSize) {
        normalizedUnparsedInstructions += ";";
      }
#endif
    }

#if 0
    // Print out this vector
    cout << "{";
    for (size_t i = 0; i < SignatureVector::Size; ++i) {
      if (i != 0) cout << ", ";
      cout << vec[i];
    }
    cout << "}\n";
#endif

    // cout << "Normalized instruction stream: " << normalizedUnparsedInstructions << endl;

    // Add vector to database
    addVectorToDatabase(con, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride);
	retVal = true;
  }
  addFunctionStatistics(con, filename, functionName, functionId, insnCount);
  return retVal;
}
// Ignores function boundaries
bool
createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId,
                                size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx)
{
    bool retVal = false;
    vector<SgAsmx86Instruction*> insns;
    FindInstructionsVisitor vis;
    AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
    size_t insnCount = insns.size();

    for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) {
        static SignatureVector vec;
        vec.clear();
        hash_map<SgAsmExpression*, size_t> valueNumbers[3];
        numberOperands(&insns[windowStart], windowSize, valueNumbers);
        string normalizedUnparsedInstructions;
        // Unparse the normalized forms of the instructions
        for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
            SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
            size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            string mne = insn->get_mnemonic();
            boost::to_lower(mne);
            normalizedUnparsedInstructions += mne;
#endif
            const SgAsmExpressionPtrList& operands = getOperands(insn);
            size_t operandCount = operands.size();
            // Add to total for this variant
            ++vec.totalForVariant(var);
            // Add to total for each kind of operand
            for (size_t i = 0; i < operandCount; ++i) {
                SgAsmExpression* operand = operands[i];
                ExpressionCategory cat = getCategory(operand);
                ++vec.opsForVariant(cat, var);
                // Add to total for this unique operand number (for this window)
                hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
                assert (numIter != valueNumbers[(int)cat].end());
                size_t num = numIter->second;
                ++vec.specificOp(cat, num);
                // Add to total for this kind of operand
                ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
                normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") +
                                                  boost::lexical_cast<string>(num);
#endif
            }

            // Add to total for this pair of operand kinds
            if (operandCount >= 2) {
                ExpressionCategory cat1 = getCategory(operands[0]);
                ExpressionCategory cat2 = getCategory(operands[1]);
                ++vec.operandPair(cat1, cat2);
            }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            if (insnNumber + 1 < windowSize) {
                normalizedUnparsedInstructions += ";";
            }
#endif
        }

        // Add vector to database
        addVectorToDatabase(tx, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions,
                            &insns[windowStart], filename, windowSize, stride);
	retVal = true;
    }
    addFunctionStatistics(tx, filename, functionName, functionId, insnCount);
    return retVal;
}
void
RoseBin_FlowAnalysis::process_jumps() {
    if (RoseBin_support::DEBUG_MODE())
      cerr << "\n >>>>>>>>> processing jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it;
  for (it=rememberInstructions.begin();it!=rememberInstructions.end();++it) {
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(it->second);
    if (inst->get_kind() == x86_call) {
      //cerr << "Found call at " << std::hex << inst->get_address() << endl;
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        //cerr << "Target is " << std::hex << target->get_address() << endl;
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
        // tps: changed this algorithm so that it runs in
        // linear time!
        ROSE_ASSERT (target->get_parent());
        if (target->get_parent()) {
          // ROSE_ASSERT(target->get_parent());
          SgAsmNode* b_b = target;
          if (!db)
            b_b = isSgAsmNode(target->get_parent());
          ROSE_ASSERT(b_b);
          SgAsmFunction* b_func = isSgAsmFunction(b_b->get_parent());

          if (b_func) {
            // (16/Oct/07) tps: this is tricky, it appears that sometimes the target can
            // be just a jmp to a new location, so we should forward this information to the correct
            // function.
            // Therefore we need to check if the current function has a return statement.
            // If not, we want to forward this information.
            if (target->get_kind() == x86_jmp) {
              //cerr << " >>>>>>>> found a jmp target - number of children: " << b_func->get_traversalSuccessorContainer().size() << endl;
              if (b_func->get_numberOfTraversalSuccessors()==1) {
                SgAsmx86Instruction* target2 = isSgAsmx86Instruction(process_jumps_get_target(inst));
                if (target2) {
                  b_b = target2;
                  if (!db)
                    b_b = isSgAsmNode(target2->get_parent());
                  b_func = isSgAsmFunction(b_b->get_parent());
                }
              }
            }


            if (inst->get_parent()) {
              //cerr << "Inst has a parent" << endl;
              if (inst->get_comment()=="")
                inst->set_comment(""+b_func->get_name());
              ROSE_ASSERT(g_algo->info);
              SgAsmInstruction* inst_after = g_algo->info->getInstructionAtAddress(inst->get_address() + inst->get_raw_bytes().size()); // inst->cfgBinFlowOutEdge(info);
              if (inst_after) {
                //cerr << "Added dest " << std::hex << isSgAsmStatement(inst_after)->get_address() << " for function" << endl;
                b_func->append_dest(isSgAsmStatement(inst_after));
              }
            }
          } else {
            if (RoseBin_support::DEBUG_MODE())
            cerr << " NO FUNCTION DETECTED ABOVE BLOCK . " << endl;
          }

        } else {
          if (RoseBin_support::DEBUG_MODE())
            cerr << "   WARNING :: process_jumps: target has no parent ... i.e. no FunctionDeclaration to it " <<
            target->class_name() << endl;
        }
      } else {
        if (inst)
          if (RoseBin_support::DEBUG_MODE())
            cerr << "    WARNING :: process_jumps: No target found for node " << RoseBin_support::HexToString(inst->get_address())
                 << "   " << inst->get_mnemonic() << endl;
      }
    } else {

      // might be a jmp
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
      }
    }
  }
  //cerr << "\n >>>>>>>>> processing jumps ... done. " << endl;

  //  cerr << "\n >>>>>>>>> resolving RET jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it2;
  for (it2=rememberInstructions.begin();it2!=rememberInstructions.end();++it2) {
    //int id = it2->first;
    SgAsmx86Instruction* target = isSgAsmx86Instruction(it2->second);
    ROSE_ASSERT (target);
#if 1
    if (target->get_kind() == x86_ret) {
      SgAsmNode* b_b = target;
      if (!db)
        b_b = isSgAsmNode(target->get_parent());
      SgAsmFunction* parent = isSgAsmFunction(b_b->get_parent());
      if (parent) {
        //ROSE_ASSERT(parent);
        std::vector <SgAsmStatement*> dest_list = parent->get_dest();
        for (size_t i = 0; i < dest_list.size(); ++i) {
          ROSE_ASSERT (isSgAsmInstruction(dest_list[i]));
          //cerr << "Adding ret target " << std::hex << dest_list[i]->get_address() << " to " << std::hex << target->get_address() << endl;
          //info->indirectJumpAndReturnTargets[target].insert(dest_list[i]->get_address());
          ROSE_ASSERT(g_algo->info);
          g_algo->info->incomingEdges[isSgAsmInstruction(dest_list[i])].insert(target->get_address());
        }

        std::vector <SgAsmStatement*>::iterator it3 = dest_list.begin();
        for (; it3!=dest_list.end();++it3) {
          SgAsmInstruction* dest = isSgAsmInstruction(*it3);
          if (dest) {
            dest->append_sources(target);
            //cerr << " appending source to " << dest->get_address() << "   target: " << target->get_address() << endl;
          }
        } // for
      } else { // if parent
        if (RoseBin_support::DEBUG_MODE())
          cerr << "   ERROR :: RET jumps :: no parent found for ret : " << target->class_name() << endl;
        //exit (0);
      }
    } // if ret
#endif
  }
  if (RoseBin_support::DEBUG_MODE())
    cerr << " >>>>>>>>> resolving RET jumps ... done." << endl;
}