Exemplo n.º 1
0
void addVectorToDatabase(sqlite3_connection& con, const SignatureVector& vec, const std::string& functionName, size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions, SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride) {
  ++numVectorsGenerated;

  vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size);
  size_t vectorSum = 0;
  for (size_t i = 0; i < SignatureVector::Size; ++i) {
    vectorSum += vec[i];
  }

  string db_select_n = "INSERT INTO vectors( function_id,  index_within_function, line, offset, sum_of_counts, counts, instr_seq ) VALUES(?,?,?,?,?,?,?)";
  string line = boost::lexical_cast<string>(isSgAsmStatement(firstInsn[0])->get_address());
  string offset = boost::lexical_cast<string>(isSgAsmStatement(firstInsn[windowSize - 1])->get_address());


  unsigned char md[16];
  //calculate_md5_of( (const unsigned char*) normalizedUnparsedInstructions.data() , normalizedUnparsedInstructions.size(), md ) ;
  MD5( (const unsigned char*) normalizedUnparsedInstructions.data() , normalizedUnparsedInstructions.size(), md ) ;

  sqlite3_command cmd(con, db_select_n.c_str());
  cmd.bind(1, (int)functionId );
  cmd.bind(2, (int)indexWithinFunction );
  cmd.bind(3, line);
  cmd.bind(4, offset);
  cmd.bind(5, boost::lexical_cast<string>(vectorSum));
  cmd.bind(6, &compressedCounts[0], compressedCounts.size());
  cmd.bind(7, md,16);
  //cmd.bind(7, "");

  cmd.executenonquery();
}
Exemplo n.º 2
0
 //convert args then call setValues
 void Message::setValues(const std::vector<qi::AnyReference>& in, const qi::Signature& expectedSignature, ObjectHost* context, StreamContext* streamContext) {
   qi::Signature argsSig = qi::makeTupleSignature(in, false);
   if (expectedSignature == argsSig) {
     setValues(in, context, streamContext);
     return;
   }
   if (expectedSignature == "m")
   {
     /* We need to send a dynamic containing the value tuple to push the
      * signature. This wraps correctly without copying the data.
      */
     std::vector<qi::TypeInterface*> types;
     std::vector<void*> values;
     types.resize(in.size());
     values.resize(in.size());
     for (unsigned i=0; i<in.size(); ++i)
     {
       types[i] = in[i].type();
       values[i] = in[i].rawValue();
     }
     AnyReference tuple = makeGenericTuplePtr(types, values);
     AnyValue val(tuple, false, false);
     encodeBinary(&_p->buffer, AnyReference::from(val), boost::bind(serializeObject, _1, context), streamContext);
     return;
   }
   /* This check does not makes sense for this transport layer who does not care,
    * But it checks a general rule that is true for all the messages we use and
    * it can help catch many mistakes.
    */
   if (expectedSignature.type() != Signature::Type_Tuple)
     throw std::runtime_error("Expected a tuple, got " + expectedSignature.toString());
   AnyReferenceVector nargs(in);
   SignatureVector src = argsSig.children();
   SignatureVector dst = expectedSignature.children();
   if (src.size() != dst.size())
     throw std::runtime_error("remote call: signature size mismatch");
   SignatureVector::iterator its = src.begin(), itd = dst.begin();
   boost::dynamic_bitset<> allocated(nargs.size());
   for (unsigned i = 0; i< nargs.size(); ++i, ++its, ++itd)
   {
     if (*its != *itd)
     {
       ::qi::TypeInterface* target = ::qi::TypeInterface::fromSignature(*itd);
       if (!target)
         throw std::runtime_error("remote call: Failed to obtain a type from signature " + (*itd).toString());
       std::pair<AnyReference, bool> c = nargs[i].convert(target);
       if (!c.first.type())
       {
         throw std::runtime_error(
               _QI_LOG_FORMAT("remote call: failed to convert argument %s from %s to %s", i, (*its).toString(), (*itd).toString()));
       }
       nargs[i] = c.first;
       allocated[i] = c.second;
     }
   }
   setValues(nargs, context, streamContext);
   for (unsigned i = 0; i< nargs.size(); ++i)
     if (allocated[i])
       nargs[i].destroy();
 }
Exemplo n.º 3
0
void
addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName,
                    size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions,
                    SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride)
{
    ++numVectorsGenerated;

    vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size);
    size_t vectorSum = 0;
    for (size_t i=0; i<SignatureVector::Size; ++i)
        vectorSum += vec[i];

    ExtentMap extent;
    for (size_t i=0; i<windowSize; ++i)
        extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size()));

    unsigned char md[16];
    MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md);

    SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors"
                                                  // 0   1            2                      3     4             5
                                                  " (id, function_id, index_within_function, line, last_insn_va, size,"
                                                  // 6            7           8
                                                  "sum_of_counts, counts_b64, instr_seq_b64)"
                                                  " values (?,?,?,?,?,?,?,?,?)");
    int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin
    cmd->bind(0, vector_id);
    cmd->bind(1, functionId);
    cmd->bind(2, indexWithinFunction);
    cmd->bind(3, firstInsn[0]->get_address());
    cmd->bind(4, firstInsn[windowSize-1]->get_address());
    cmd->bind(5, extent.size());
    cmd->bind(6, vectorSum);
    cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size()));
    cmd->bind(8, StringUtility::encode_base64(md, 16));
    cmd->execute();
}
Exemplo n.º 4
0
// Ignores function boundaries
bool
createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId,
                                size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx)
{
    bool retVal = false;
    vector<SgAsmx86Instruction*> insns;
    FindInstructionsVisitor vis;
    AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
    size_t insnCount = insns.size();

    for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) {
        static SignatureVector vec;
        vec.clear();
        hash_map<SgAsmExpression*, size_t> valueNumbers[3];
        numberOperands(&insns[windowStart], windowSize, valueNumbers);
        string normalizedUnparsedInstructions;
        // Unparse the normalized forms of the instructions
        for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
            SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
            size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            string mne = insn->get_mnemonic();
            boost::to_lower(mne);
            normalizedUnparsedInstructions += mne;
#endif
            const SgAsmExpressionPtrList& operands = getOperands(insn);
            size_t operandCount = operands.size();
            // Add to total for this variant
            ++vec.totalForVariant(var);
            // Add to total for each kind of operand
            for (size_t i = 0; i < operandCount; ++i) {
                SgAsmExpression* operand = operands[i];
                ExpressionCategory cat = getCategory(operand);
                ++vec.opsForVariant(cat, var);
                // Add to total for this unique operand number (for this window)
                hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
                assert (numIter != valueNumbers[(int)cat].end());
                size_t num = numIter->second;
                ++vec.specificOp(cat, num);
                // Add to total for this kind of operand
                ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
                normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") +
                                                  boost::lexical_cast<string>(num);
#endif
            }

            // Add to total for this pair of operand kinds
            if (operandCount >= 2) {
                ExpressionCategory cat1 = getCategory(operands[0]);
                ExpressionCategory cat2 = getCategory(operands[1]);
                ++vec.operandPair(cat1, cat2);
            }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
            if (insnNumber + 1 < windowSize) {
                normalizedUnparsedInstructions += ";";
            }
#endif
        }

        // Add vector to database
        addVectorToDatabase(tx, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions,
                            &insns[windowStart], filename, windowSize, stride);
	retVal = true;
    }
    addFunctionStatistics(tx, filename, functionName, functionId, insnCount);
    return retVal;
}
Exemplo n.º 5
0
bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, sqlite3_connection& con) { // Ignores function boundaries
  bool retVal = false;
  vector<SgAsmx86Instruction*> insns;
  FindInstructionsVisitor vis;
  AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns ));
  std::cout << "Number of instructions: " << insns.size() << std::endl;
  size_t insnCount = insns.size();

  for (size_t windowStart = 0;
       windowStart + windowSize <= insnCount;
       windowStart += stride) {
    static SignatureVector vec;
    vec.clear();
    hash_map<SgAsmExpression*, size_t> valueNumbers[3];
    numberOperands(&insns[windowStart], windowSize, valueNumbers);
    string normalizedUnparsedInstructions;
    // Unparse the normalized forms of the instructions
    for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) {
      SgAsmx86Instruction* insn = insns[windowStart + insnNumber];
      size_t var = getInstructionKind(insn);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      string mne = insn->get_mnemonic();
      boost::to_lower(mne);
      normalizedUnparsedInstructions += mne;
#endif
      const SgAsmExpressionPtrList& operands = getOperands(insn);
      size_t operandCount = operands.size();
      // Add to total for this variant
      ++vec.totalForVariant(var);
      // Add to total for each kind of operand
      for (size_t i = 0; i < operandCount; ++i) {
        SgAsmExpression* operand = operands[i];
        ExpressionCategory cat = getCategory(operand);
        ++vec.opsForVariant(cat, var);
        // Add to total for this unique operand number (for this window)
        hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand);
        assert (numIter != valueNumbers[(int)cat].end());
        size_t num = numIter->second;
        ++vec.specificOp(cat, num);
        // Add to total for this kind of operand
        ++vec.operandTotal(cat);
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
        normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num);
#endif
      }

	  //Try to see what the effect is of jumps on the false positive rate
	  //uint64_t addr =0;
          /*
	  if( x86GetKnownBranchTarget(insn, addr) == true  )
	  {
		uint64_t insn_addr = insn->get_address();
		if( addr < insn_addr )
		  normalizedUnparsedInstructions += " UP ";
		else
		  normalizedUnparsedInstructions += " DOWN ";
	  }*/
	  
      // Add to total for this pair of operand kinds
      if (operandCount >= 2) {
        ExpressionCategory cat1 = getCategory(operands[0]);
        ExpressionCategory cat2 = getCategory(operands[1]);
        ++vec.operandPair(cat1, cat2);
      }
#ifdef NORMALIZED_UNPARSED_INSTRUCTIONS
      if (insnNumber + 1 < windowSize) {
        normalizedUnparsedInstructions += ";";
      }
#endif
    }

#if 0
    // Print out this vector
    cout << "{";
    for (size_t i = 0; i < SignatureVector::Size; ++i) {
      if (i != 0) cout << ", ";
      cout << vec[i];
    }
    cout << "}\n";
#endif

    // cout << "Normalized instruction stream: " << normalizedUnparsedInstructions << endl;

    // Add vector to database
    addVectorToDatabase(con, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride);
	retVal = true;
  }
  addFunctionStatistics(con, filename, functionName, functionId, insnCount);
  return retVal;
}