Пример #1
0
// Ignores function boundaries
bool
createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId,
                                size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx)
{
    bool retVal = false;
    vector<SgAsmx86Instruction*> insns;
    FindInstructionsVisitor vis;
    AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
    size_t insnCount = insns.size();

    for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) {
        static SignatureVector vec;
        vec.clear();
        hash_map<SgAsmExpression*, size_t> valueNumbers[3];
        numberOperands(&insns[windowStart], windowSize, valueNumbers);
        string normalizedUnparsedInstructions;
        // Unparse the normalized forms of the instructions
        for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
            SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
            size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            string mne = insn->get_mnemonic();
            boost::to_lower(mne);
            normalizedUnparsedInstructions += mne;
#endif
            const SgAsmExpressionPtrList& operands = getOperands(insn);
            size_t operandCount = operands.size();
            // Add to total for this variant
            ++vec.totalForVariant(var);
            // Add to total for each kind of operand
            for (size_t i = 0; i < operandCount; ++i) {
                SgAsmExpression* operand = operands[i];
                ExpressionCategory cat = getCategory(operand);
                ++vec.opsForVariant(cat, var);
                // Add to total for this unique operand number (for this window)
                hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
                assert (numIter != valueNumbers[(int)cat].end());
                size_t num = numIter->second;
                ++vec.specificOp(cat, num);
                // Add to total for this kind of operand
                ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
                normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") +
                                                  boost::lexical_cast<string>(num);
#endif
            }

            // Add to total for this pair of operand kinds
            if (operandCount >= 2) {
                ExpressionCategory cat1 = getCategory(operands[0]);
                ExpressionCategory cat2 = getCategory(operands[1]);
                ++vec.operandPair(cat1, cat2);
            }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            if (insnNumber + 1 < windowSize) {
                normalizedUnparsedInstructions += ";";
            }
#endif
        }

        // Add vector to database
        addVectorToDatabase(tx, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions,
                            &insns[windowStart], filename, windowSize, stride);
	retVal = true;
    }
    addFunctionStatistics(tx, filename, functionName, functionId, insnCount);
    return retVal;
}
Пример #2
0
bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, sqlite3_connection& con) { // Ignores function boundaries
  bool retVal = false;
  vector<SgAsmx86Instruction*> insns;
  FindInstructionsVisitor vis;
  AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
  std::cout << "Number of instructions: " << insns.size() << std::endl;
  size_t insnCount = insns.size();

  for (size_t windowStart = 0;
       windowStart + windowSize <= insnCount;
       windowStart += stride) {
    static SignatureVector vec;
    vec.clear();
    hash_map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands(&insns[windowStart], windowSize, valueNumbers);
    string normalizedUnparsedInstructions;
    // Unparse the normalized forms of the instructions
    for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
      SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
      size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      normalizedUnparsedInstructions += mne;
#endif
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      size_t operandCount = operands.size();
      // Add to total for this variant
      ++vec.totalForVariant(var);
      // Add to total for each kind of operand
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        ++vec.opsForVariant(cat, var);
        // Add to total for this unique operand number (for this window)
        hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;
        ++vec.specificOp(cat, num);
        // Add to total for this kind of operand
        ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
        normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num);
#endif
      }

	  //Try to see what the effect is of jumps on the false positive rate
	  //uint64_t addr =0;
          /*
	  if( x86GetKnownBranchTarget(insn, addr) == true  )
	  {
		uint64_t insn_addr = insn->get_address();
		if( addr < insn_addr )
		  normalizedUnparsedInstructions += " UP ";
		else
		  normalizedUnparsedInstructions += " DOWN ";
	  }*/
	  
      // Add to total for this pair of operand kinds
      if (operandCount >= 2) {
        ExpressionCategory cat1 = getCategory(operands[0]);
        ExpressionCategory cat2 = getCategory(operands[1]);
        ++vec.operandPair(cat1, cat2);
      }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      if (insnNumber + 1 < windowSize) {
        normalizedUnparsedInstructions += ";";
      }
#endif
    }

#if 0
    // Print out this vector
    cout << "{";
    for (size_t i = 0; i < SignatureVector::Size; ++i) {
      if (i != 0) cout << ", ";
      cout << vec[i];
    }
    cout << "}\n";
#endif

    // cout << "Normalized instruction stream: " << normalizedUnparsedInstructions << endl;

    // Add vector to database
    addVectorToDatabase(con, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride);
	retVal = true;
  }
  addFunctionStatistics(con, filename, functionName, functionId, insnCount);
  return retVal;
}