void addVectorToDatabase(sqlite3_connection& con, const SignatureVector& vec, const std::string& functionName, size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions, SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride) { ++numVectorsGenerated; vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size); size_t vectorSum = 0; for (size_t i = 0; i < SignatureVector::Size; ++i) { vectorSum += vec[i]; } string db_select_n = "INSERT INTO vectors( function_id, index_within_function, line, offset, sum_of_counts, counts, instr_seq ) VALUES(?,?,?,?,?,?,?)"; string line = boost::lexical_cast<string>(isSgAsmStatement(firstInsn[0])->get_address()); string offset = boost::lexical_cast<string>(isSgAsmStatement(firstInsn[windowSize - 1])->get_address()); unsigned char md[16]; //calculate_md5_of( (const unsigned char*) normalizedUnparsedInstructions.data() , normalizedUnparsedInstructions.size(), md ) ; MD5( (const unsigned char*) normalizedUnparsedInstructions.data() , normalizedUnparsedInstructions.size(), md ) ; sqlite3_command cmd(con, db_select_n.c_str()); cmd.bind(1, (int)functionId ); cmd.bind(2, (int)indexWithinFunction ); cmd.bind(3, line); cmd.bind(4, offset); cmd.bind(5, boost::lexical_cast<string>(vectorSum)); cmd.bind(6, &compressedCounts[0], compressedCounts.size()); cmd.bind(7, md,16); //cmd.bind(7, ""); cmd.executenonquery(); }
//convert args then call setValues void Message::setValues(const std::vector<qi::AnyReference>& in, const qi::Signature& expectedSignature, ObjectHost* context, StreamContext* streamContext) { qi::Signature argsSig = qi::makeTupleSignature(in, false); if (expectedSignature == argsSig) { setValues(in, context, streamContext); return; } if (expectedSignature == "m") { /* We need to send a dynamic containing the value tuple to push the * signature. This wraps correctly without copying the data. */ std::vector<qi::TypeInterface*> types; std::vector<void*> values; types.resize(in.size()); values.resize(in.size()); for (unsigned i=0; i<in.size(); ++i) { types[i] = in[i].type(); values[i] = in[i].rawValue(); } AnyReference tuple = makeGenericTuplePtr(types, values); AnyValue val(tuple, false, false); encodeBinary(&_p->buffer, AnyReference::from(val), boost::bind(serializeObject, _1, context), streamContext); return; } /* This check does not makes sense for this transport layer who does not care, * But it checks a general rule that is true for all the messages we use and * it can help catch many mistakes. */ if (expectedSignature.type() != Signature::Type_Tuple) throw std::runtime_error("Expected a tuple, got " + expectedSignature.toString()); AnyReferenceVector nargs(in); SignatureVector src = argsSig.children(); SignatureVector dst = expectedSignature.children(); if (src.size() != dst.size()) throw std::runtime_error("remote call: signature size mismatch"); SignatureVector::iterator its = src.begin(), itd = dst.begin(); boost::dynamic_bitset<> allocated(nargs.size()); for (unsigned i = 0; i< nargs.size(); ++i, ++its, ++itd) { if (*its != *itd) { ::qi::TypeInterface* target = ::qi::TypeInterface::fromSignature(*itd); if (!target) throw std::runtime_error("remote call: Failed to obtain a type from signature " + (*itd).toString()); std::pair<AnyReference, bool> c = nargs[i].convert(target); if (!c.first.type()) { throw std::runtime_error( _QI_LOG_FORMAT("remote call: failed to convert argument %s from %s to %s", i, (*its).toString(), (*itd).toString())); } nargs[i] = c.first; allocated[i] = c.second; } } setValues(nargs, context, streamContext); for (unsigned i = 0; i< nargs.size(); ++i) if (allocated[i]) nargs[i].destroy(); }
void addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName, size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions, SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride) { ++numVectorsGenerated; vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size); size_t vectorSum = 0; for (size_t i=0; i<SignatureVector::Size; ++i) vectorSum += vec[i]; ExtentMap extent; for (size_t i=0; i<windowSize; ++i) extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size())); unsigned char md[16]; MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md); SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors" // 0 1 2 3 4 5 " (id, function_id, index_within_function, line, last_insn_va, size," // 6 7 8 "sum_of_counts, counts_b64, instr_seq_b64)" " values (?,?,?,?,?,?,?,?,?)"); int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin cmd->bind(0, vector_id); cmd->bind(1, functionId); cmd->bind(2, indexWithinFunction); cmd->bind(3, firstInsn[0]->get_address()); cmd->bind(4, firstInsn[windowSize-1]->get_address()); cmd->bind(5, extent.size()); cmd->bind(6, vectorSum); cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size())); cmd->bind(8, StringUtility::encode_base64(md, 16)); cmd->execute(); }
// Ignores function boundaries bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx) { bool retVal = false; vector<SgAsmx86Instruction*> insns; FindInstructionsVisitor vis; AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns )); size_t insnCount = insns.size(); for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) { static SignatureVector vec; vec.clear(); hash_map<SgAsmExpression*, size_t> valueNumbers[3]; numberOperands(&insns[windowStart], windowSize, valueNumbers); string normalizedUnparsedInstructions; // Unparse the normalized forms of the instructions for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) { SgAsmx86Instruction* insn = insns[windowStart + insnNumber]; size_t var = getInstructionKind(insn); #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS string mne = insn->get_mnemonic(); boost::to_lower(mne); normalizedUnparsedInstructions += mne; #endif const SgAsmExpressionPtrList& operands = getOperands(insn); size_t operandCount = operands.size(); // Add to total for this variant ++vec.totalForVariant(var); // Add to total for each kind of operand for (size_t i = 0; i < operandCount; ++i) { SgAsmExpression* operand = operands[i]; ExpressionCategory cat = getCategory(operand); ++vec.opsForVariant(cat, var); // Add to total for this unique operand number (for this window) hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand); assert (numIter != valueNumbers[(int)cat].end()); size_t num = numIter->second; ++vec.specificOp(cat, num); // Add to total for this kind of operand ++vec.operandTotal(cat); #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num); #endif } // Add to total for this pair of operand kinds if (operandCount >= 2) { ExpressionCategory cat1 = getCategory(operands[0]); ExpressionCategory cat2 = getCategory(operands[1]); ++vec.operandPair(cat1, cat2); } #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS if (insnNumber + 1 < windowSize) { normalizedUnparsedInstructions += ";"; } #endif } // Add vector to database addVectorToDatabase(tx, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride); retVal = true; } addFunctionStatistics(tx, filename, functionName, functionId, insnCount); return retVal; }
bool createVectorsForAllInstructions(SgNode* top, const std::string& filename, const std::string& functionName, int functionId, size_t windowSize, size_t stride, sqlite3_connection& con) { // Ignores function boundaries bool retVal = false; vector<SgAsmx86Instruction*> insns; FindInstructionsVisitor vis; AstQueryNamespace::querySubTree(top, std::bind2nd( vis, &insns )); std::cout << "Number of instructions: " << insns.size() << std::endl; size_t insnCount = insns.size(); for (size_t windowStart = 0; windowStart + windowSize <= insnCount; windowStart += stride) { static SignatureVector vec; vec.clear(); hash_map<SgAsmExpression*, size_t> valueNumbers[3]; numberOperands(&insns[windowStart], windowSize, valueNumbers); string normalizedUnparsedInstructions; // Unparse the normalized forms of the instructions for (size_t insnNumber = 0; insnNumber < windowSize; ++insnNumber) { SgAsmx86Instruction* insn = insns[windowStart + insnNumber]; size_t var = getInstructionKind(insn); #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS string mne = insn->get_mnemonic(); boost::to_lower(mne); normalizedUnparsedInstructions += mne; #endif const SgAsmExpressionPtrList& operands = getOperands(insn); size_t operandCount = operands.size(); // Add to total for this variant ++vec.totalForVariant(var); // Add to total for each kind of operand for (size_t i = 0; i < operandCount; ++i) { SgAsmExpression* operand = operands[i]; ExpressionCategory cat = getCategory(operand); ++vec.opsForVariant(cat, var); // Add to total for this unique operand number (for this window) hash_map<SgAsmExpression*, size_t>::const_iterator numIter = valueNumbers[(int)cat].find(operand); assert (numIter != valueNumbers[(int)cat].end()); size_t num = numIter->second; ++vec.specificOp(cat, num); // Add to total for this kind of operand ++vec.operandTotal(cat); #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS normalizedUnparsedInstructions += (cat == ec_reg ? "R" : cat == ec_mem ? "M" : "V") + boost::lexical_cast<string>(num); #endif } //Try to see what the effect is of jumps on the false positive rate //uint64_t addr =0; /* if( x86GetKnownBranchTarget(insn, addr) == true ) { uint64_t insn_addr = insn->get_address(); if( addr < insn_addr ) normalizedUnparsedInstructions += " UP "; else normalizedUnparsedInstructions += " DOWN "; }*/ // Add to total for this pair of operand kinds if (operandCount >= 2) { ExpressionCategory cat1 = getCategory(operands[0]); ExpressionCategory cat2 = getCategory(operands[1]); ++vec.operandPair(cat1, cat2); } #ifdef NORMALIZED_UNPARSED_INSTRUCTIONS if (insnNumber + 1 < windowSize) { normalizedUnparsedInstructions += ";"; } #endif } #if 0 // Print out this vector cout << "{"; for (size_t i = 0; i < SignatureVector::Size; ++i) { if (i != 0) cout << ", "; cout << vec[i]; } cout << "}\n"; #endif // cout << "Normalized instruction stream: " << normalizedUnparsedInstructions << endl; // Add vector to database addVectorToDatabase(con, vec, functionName, functionId, windowStart/stride, normalizedUnparsedInstructions, &insns[windowStart], filename, windowSize, stride); retVal = true; } addFunctionStatistics(con, filename, functionName, functionId, insnCount); return retVal; }