int main(int argc, char *argv[]) { measurement_p l,g; test_p tst = (test_p)malloc(sizeof(test_t)); /* initialize communication and get options */ comm_initialize(tst, &argc, &argv); ROOTONLY printf("%s\n", COPYRIGHT); general_options(tst,argc,argv); /* create measurement structs */ l = measurement_create(tst, "local"); g = measurement_create(tst, "global"); ROOTONLY mkdir(tst->case_name, 0755); ROOTONLY printf("Confidence: testing...\n"); measurement_collect(tst, l); ROOTONLY printf("Confidence: local analysis...\n"); measurement_analyze(tst, l, -1.0); ROOTONLY printf("Confidence: remote analysis\n"); comm_aggregate(g, l); measurement_analyze(tst, g, -1.0); ROOTONLY printf("Confidence: saving results\n"); measurement_serialize(tst, g, root_rank); /* free measurement and test structs */ l = measurement_destroy(l); g = measurement_destroy(g); if (tst->argv != NULL) free(tst->argv); if (tst->tsdump != NULL) free(tst->tsdump); free(tst); comm_finalize(); return 0; }
int main(int argc, char* argv[]) { std::string server; std::string port; std::string user; std::string folder; std::string password; fs::path from; fs::path to; bool inbox; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("save-raw", po::value<fs::path>(&to), "path to save the data (after download phase)"); po::options_description source_options("Download"); source_options.add_options() ("load", po::value<fs::path>(&from), "mail folder"); po::options_description run_options("Run"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(source_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } email_id_bimap email_id; connectedness_graph cg; entity_map em; initial_group_partition_map igpm; message_id_set message_id; if(!vm.count("save-raw")) { std::cout << "you must specify --save-raw with a file name" << std::endl; return 1; } if(!vm.count("load") || !fs::exists(from) || !fs::is_directory(from)) { std::cout << "missing source data folder (or not a folder)" << std::endl; return 1; } std::vector<char> buffer(128 * 1024); std::string headers; std::cout << "loading " << from; for(fs::recursive_directory_iterator fe, fi(from); fi != fe; ++fi) { if(fs::is_directory(*fi)) continue; fs::ifstream in(*fi); headers.clear(); while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line.empty()) break; headers += "\n"; headers += line; } members_t g; for(boost::sregex_iterator i(headers.begin(), headers.end(), re_terms), e; i != e; ++i) { const boost::smatch& what = *i; std::string field = what[1].str(); if(boost::algorithm::iequals(field, "Message-ID")) { std::string id = what[2].str(); boost::algorithm::trim(id); std::pair<message_id_set::iterator, bool> result = message_id.insert(id); //skip this duplicate message if(!result.second) { g.clear(); break; } } else if(boost::algorithm::iequals(field, "From") || boost::algorithm::iequals(field, "To") || boost::algorithm::iequals(field, "Cc")) { std::string data = what[2].str(); boost::replace_all(data, "\n", ""); boost::replace_all(data, "\r", ""); for(boost::sregex_iterator j(data.begin(), data.end(), re_email), e; j != e; ++j) { std::string name = (*j)[1].str(); if(name.empty()) name = (*j)[2].str(); std::string email_address = (*j)[3].str(); boost::algorithm::to_lower(email_address); boost::algorithm::trim(name); std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; if(!name.empty() && boost::to_lower_copy(name) != email_address) em[email_address].insert(name); g.insert(result.first->second); } } } if(g.empty()) { continue; } initial_group_partition_map::iterator r = igpm.find(g); if(r == igpm.end()) { connectedness_graph::vertex_descriptor node = gr::add_vertex(cg); cg[node].members = g; cg[node].weight = 1; igpm.insert(r, std::make_pair(g, node)); } else { connectedness_graph::vertex_descriptor node = r->second; cg[node].weight++; } std::cout << ". " << std::flush; } std::cout << std::endl; if(fs::exists(to)) fs::remove(to); fs::ofstream out(to); std::cout << "saving data to " << to.file_string(); for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { out << (unsigned long long)cg[*i].weight; for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { out << "\t" << email_id.by<bit>().equal_range(*j).first->second; } out << std::endl; } out << "-" << std::endl; for(entity_map::iterator i = em.begin(); i != em.end(); ++i) { out << i->first; for(std::set<std::string>::iterator k = i->second.begin(); k != i->second.end(); ++k) { out << "\t" << *k; } out << std::endl; } return 0; }
Status addMongosOptions(moe::OptionSection* options) { moe::OptionSection general_options("General options"); Status ret = addGeneralServerOptions(&general_options); if (!ret.isOK()) { return ret; } #if defined(_WIN32) moe::OptionSection windows_scm_options("Windows Service Control Manager options"); ret = addWindowsServerOptions(&windows_scm_options); if (!ret.isOK()) { return ret; } #endif #ifdef MONGO_SSL moe::OptionSection ssl_options("SSL options"); ret = addSSLServerOptions(&ssl_options); if (!ret.isOK()) { return ret; } #endif moe::OptionSection sharding_options("Sharding options"); sharding_options.addOptionChaining("configdb", "configdb", moe::String, "1 or 3 comma separated config servers"); sharding_options.addOptionChaining("localThreshold", "localThreshold", moe::Int, "ping time (in ms) for a node to be considered local (default 15ms)"); sharding_options.addOptionChaining("test", "test", moe::Switch, "just run unit tests"); sharding_options.addOptionChaining("upgrade", "upgrade", moe::Switch, "upgrade meta data version"); sharding_options.addOptionChaining("chunkSize", "chunkSize", moe::Int, "maximum amount of data per chunk"); sharding_options.addOptionChaining("ipv6", "ipv6", moe::Switch, "enable IPv6 support (disabled by default)"); sharding_options.addOptionChaining("jsonp", "jsonp", moe::Switch, "allow JSONP access via http (has security implications)"); sharding_options.addOptionChaining("noscripting", "noscripting", moe::Switch, "disable scripting engine"); options->addSection(general_options); #if defined(_WIN32) options->addSection(windows_scm_options); #endif options->addSection(sharding_options); #ifdef MONGO_SSL options->addSection(ssl_options); #endif options->addOptionChaining("noAutoSplit", "noAutoSplit", moe::Switch, "do not send split commands with writes") .hidden(); return Status::OK(); }
int main(int argc, char* argv[]) { std::string server; std::string port; std::string user; std::string folder; std::string password; std::vector<fs::path> from; std::vector<fs::path> entity; fs::path to; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("save-raw", po::value<fs::path>(&to), "path to save the data (after download phase)"); po::options_description download_options("Download"); download_options.add_options() ("server", po::value<std::string>(&server), "imap server dns/ip") ("port", po::value<std::string>(&port)->default_value("993"), "imap port") ("folder", po::value<std::string>(&folder)->default_value("Sent"), "imap folder") ("user", po::value<std::string>(&user), "imap username") ("password", po::value<std::string>(&password), "imap password (will ask if not specified)"); po::options_description run_options("Run"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(download_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } email_id_bimap email_id; connectedness_graph cg; entity_map em; initial_group_partition_map igpm; if(!vm.count("save-raw")) { std::cout << "you must specify --save-raw with a file name" << std::endl; return 1; } if(!vm.count("password")) { password = getpass("Password: "******"missing server for download" << std::endl; return 1; } if(user.empty()) { std::cout << "missing user for download" << std::endl; return 1; } if(password.empty()) { std::cout << "missing user for download" << std::endl; return 1; } //this is our network block, downloads all messages headers try { std::cout << "downloading " << folder << " from " << server << std::endl; //use to dedupe if there are dupes message_id_set message_id; typedef boost::function<void (const std::string&, const std::list<std::string>& args)> untagged_handler; std::string pending_tag = "* "; std::list<std::string> pending_command; pending_command.push_back("WAIT_FOR_ACK"); untagged_handler pending_handler; unsigned int command_id = 0; //The sequence of imap commands we want to run std::list<std::list<std::string> > commands; std::list<untagged_handler> handlers; handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); std::ostringstream login_os; login_os << "LOGIN \"" << user << "\" {" << password.size() << "}"; commands.back().push_back(login_os.str()); commands.back().push_back(password); handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); commands.back().push_back("LIST \"\" *"); handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); commands.back().push_back("SELECT \"" + folder + "\""); handlers.push_back(header_handler(email_id, cg, em, message_id, igpm)); commands.push_back(std::list<std::string>()); commands.back().push_back("FETCH 1:* (BODY.PEEK[HEADER.FIELDS (MESSAGE-ID FROM TO CC)])"); commands.push_back(std::list<std::string>()); handlers.push_back(log_handler()); commands.back().push_back("LOGOUT"); //open ssl connection to the server, no cert checking asio::io_service io_service; asio::ip::tcp::resolver resolver(io_service); asio::ip::tcp::resolver::query query(server, port); asio::ip::tcp::resolver::iterator iterator = resolver.resolve(query); asio::ssl::context context(io_service, asio::ssl::context::sslv23); context.set_verify_mode(asio::ssl::context::verify_none); asio::ssl::stream<asio::ip::tcp::socket> socket(io_service, context); socket.lowest_layer().connect(*iterator); socket.handshake(asio::ssl::stream_base::client); asio::streambuf buf; while(true) { //read the next line of data std::size_t line_length = asio::read_until(socket, buf, re_crlf); std::string line( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); boost::match_results<std::string::iterator> what; std::size_t initial = 0; std::list<std::string> args; //the line may be split into segments with chunks of data embedded, this is the case //for bodies or message header blocks that are returned, we only handle this case if it //comes in untagged response (*) not a continuation (+), i think that is normal while(regex_search(line.begin() + initial, line.end(), what, re_byte_buffer, boost::match_default)) { unsigned int bytes = boost::lexical_cast<unsigned int>(what[1].str()); if(buf.size() < bytes) asio::read(socket, buf, asio::transfer_at_least(bytes - buf.size())); args.push_back( std::string( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + bytes)); buf.consume(bytes); line.resize(what[1].second - line.begin()); initial = line.size(); //read the next line of data line_length = asio::read_until(socket, buf, re_crlf); line += std::string( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); } if(boost::algorithm::starts_with(line, pending_tag)) { //if the command is being completed, then we will go here, bail out if the response wasn't ok if(!boost::algorithm::starts_with(line, pending_tag + "OK")) { std::cout << line; throw std::runtime_error("command failed"); } //pull the next command off the list pending_tag = "A" + boost::lexical_cast<std::string>(command_id++) + " "; if(commands.size() == 0) break; pending_handler = handlers.front(); pending_command = commands.front(); commands.pop_front(); handlers.pop_front(); //send the command along with any data arguments std::cout << pending_tag << pending_command.front() << std::endl; asio::write(socket, asio::buffer(pending_tag.data(), pending_tag.size())); for(std::list<std::string>::iterator i = pending_command.begin(); i != pending_command.end(); ++i) { if(i != pending_command.begin()) { //print the continuation response std::size_t line_length = asio::read_until(socket, buf, re_crlf); std::string line( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); std::cout << line << std::flush; if(!boost::algorithm::starts_with(line, "+ ")) { throw std::runtime_error("bad response when writing extra data"); } } else { //print it out as well (but not the args) std::cout << *i << std::endl; } asio::write(socket, asio::buffer(i->data(), i->size())); asio::write(socket, asio::buffer("\r\n", 2)); } } else if(boost::algorithm::starts_with(line, "* ")) { //if there is a registered handler, dispatch to it if(pending_handler) pending_handler(line, args); } else { throw std::runtime_error("unrecognized response"); } } } catch (std::exception& e) { std::cout << "Exception: " << e.what() << std::endl; return 1; } std::cout << std::endl; if(to.empty()) { std::cout << "Missing output file for save" << std::endl; return 1; } if(fs::exists(to)) fs::remove(to); fs::ofstream out(to); std::cout << "saving data to " << to.file_string(); for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { out << (unsigned long long)cg[*i].weight; for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { out << "\t" << email_id.by<bit>().equal_range(*j).first->second; } out << std::endl; } out << "-" << std::endl; for(entity_map::iterator i = em.begin(); i != em.end(); ++i) { out << i->first; for(std::set<std::string>::iterator k = i->second.begin(); k != i->second.end(); ++k) { out << "\t" << *k; } out << std::endl; } return 0; }
Status addMongosOptions(moe::OptionSection* options) { moe::OptionSection general_options("General options"); Status ret = addGeneralServerOptions(&general_options); if (!ret.isOK()) { return ret; } #if defined(_WIN32) moe::OptionSection windows_scm_options("Windows Service Control Manager options"); ret = addWindowsServerOptions(&windows_scm_options); if (!ret.isOK()) { return ret; } #endif #ifdef MONGO_CONFIG_SSL moe::OptionSection ssl_options("SSL options"); ret = addSSLServerOptions(&ssl_options); if (!ret.isOK()) { return ret; } #endif moe::OptionSection sharding_options("Sharding options"); sharding_options.addOptionChaining("sharding.configDB", "configdb", moe::String, "Connection string for communicating with config servers:\n" "<config replset name>/<host1:port>,<host2:port>,[...]"); sharding_options.addOptionChaining( "replication.localPingThresholdMs", "localThreshold", moe::Int, "ping time (in ms) for a node to be considered local (default 15ms)"); sharding_options.addOptionChaining("test", "test", moe::Switch, "just run unit tests") .setSources(moe::SourceAllLegacy); sharding_options.addOptionChaining( "sharding.chunkSize", "chunkSize", moe::Int, "maximum amount of data per chunk"); sharding_options.addOptionChaining("net.http.JSONPEnabled", "jsonp", moe::Switch, "allow JSONP access via http (has security implications)") .setSources(moe::SourceAllLegacy); sharding_options.addOptionChaining( "noscripting", "noscripting", moe::Switch, "disable scripting engine") .setSources(moe::SourceAllLegacy); options->addSection(general_options); #if defined(_WIN32) options->addSection(windows_scm_options); #endif options->addSection(sharding_options); #ifdef MONGO_CONFIG_SSL options->addSection(ssl_options); #endif options->addOptionChaining("noAutoSplit", "noAutoSplit", moe::Switch, "do not send split commands with writes") .hidden() .setSources(moe::SourceAllLegacy); options->addOptionChaining( "sharding.autoSplit", "", moe::Bool, "send split commands with writes") .setSources(moe::SourceYAMLConfig); return Status::OK(); }
Status addMongodOptions(moe::OptionSection* options) { moe::OptionSection general_options("General options"); Status ret = addGeneralServerOptions(&general_options); if (!ret.isOK()) { return ret; } #if defined(_WIN32) moe::OptionSection windows_scm_options("Windows Service Control Manager options"); ret = addWindowsServerOptions(&windows_scm_options); if (!ret.isOK()) { return ret; } #endif #ifdef MONGO_SSL moe::OptionSection ssl_options("SSL options"); ret = addSSLServerOptions(&ssl_options); if (!ret.isOK()) { return ret; } #endif moe::OptionSection ms_options("Master/slave options (old; use replica sets instead)"); moe::OptionSection rs_options("Replica set options"); moe::OptionSection replication_options("Replication options"); moe::OptionSection sharding_options("Sharding options"); general_options.addOptionChaining("auth", "auth", moe::Switch, "run with security"); general_options.addOptionChaining("cpu", "cpu", moe::Switch, "periodically show cpu and iowait utilization"); #ifdef _WIN32 general_options.addOptionChaining("dbpath", "dbpath", moe::String, "directory for datafiles - defaults to \\data\\db\\") .setDefault(moe::Value(std::string("\\data\\db\\"))); #else general_options.addOptionChaining("dbpath", "dbpath", moe::String, "directory for datafiles - defaults to /data/db/") .setDefault(moe::Value(std::string("/data/db"))); #endif general_options.addOptionChaining("diaglog", "diaglog", moe::Int, "0=off 1=W 2=R 3=both 7=W+some reads"); general_options.addOptionChaining("directoryperdb", "directoryperdb", moe::Switch, "each database will be stored in a separate directory"); general_options.addOptionChaining("ipv6", "ipv6", moe::Switch, "enable IPv6 support (disabled by default)"); general_options.addOptionChaining("journal", "journal", moe::Switch, "enable journaling"); general_options.addOptionChaining("journalCommitInterval", "journalCommitInterval", moe::Unsigned, "how often to group/batch commit (ms)"); general_options.addOptionChaining("journalOptions", "journalOptions", moe::Int, "journal diagnostic options"); general_options.addOptionChaining("jsonp", "jsonp", moe::Switch, "allow JSONP access via http (has security implications)"); general_options.addOptionChaining("noauth", "noauth", moe::Switch, "run without security"); general_options.addOptionChaining("noIndexBuildRetry", "noIndexBuildRetry", moe::Switch, "don't retry any index builds that were interrupted by shutdown"); general_options.addOptionChaining("nojournal", "nojournal", moe::Switch, "disable journaling (journaling is on by default for 64 bit)"); general_options.addOptionChaining("noprealloc", "noprealloc", moe::Switch, "disable data file preallocation - will often hurt performance"); general_options.addOptionChaining("noscripting", "noscripting", moe::Switch, "disable scripting engine"); general_options.addOptionChaining("notablescan", "notablescan", moe::Switch, "do not allow table scans"); general_options.addOptionChaining("nssize", "nssize", moe::Int, ".ns file size (in MB) for new databases") .setDefault(moe::Value(16)); general_options.addOptionChaining("profile", "profile", moe::Int, "0=off 1=slow, 2=all"); general_options.addOptionChaining("quota", "quota", moe::Switch, "limits each database to a certain number of files (8 default)"); general_options.addOptionChaining("quotaFiles", "quotaFiles", moe::Int, "number of files allowed per db, requires --quota"); general_options.addOptionChaining("repair", "repair", moe::Switch, "run repair on all dbs"); general_options.addOptionChaining("repairpath", "repairpath", moe::String, "root directory for repair files - defaults to dbpath"); general_options.addOptionChaining("rest", "rest", moe::Switch, "turn on simple rest api"); #if defined(__linux__) general_options.addOptionChaining("shutdown", "shutdown", moe::Switch, "kill a running server (for init scripts)"); #endif general_options.addOptionChaining("slowms", "slowms", moe::Int, "value of slow for profile and console log") .setDefault(moe::Value(100)); general_options.addOptionChaining("smallfiles", "smallfiles", moe::Switch, "use a smaller default file size"); general_options.addOptionChaining("syncdelay", "syncdelay", moe::Double, "seconds between disk syncs (0=never, but not recommended)") .setDefault(moe::Value(60.0)); general_options.addOptionChaining("sysinfo", "sysinfo", moe::Switch, "print some diagnostic system information"); general_options.addOptionChaining("upgrade", "upgrade", moe::Switch, "upgrade db if needed"); replication_options.addOptionChaining("oplogSize", "oplogSize", moe::Int, "size to use (in MB) for replication op log. default is 5% of disk space " "(i.e. large is good)"); ms_options.addOptionChaining("master", "master", moe::Switch, "master mode"); ms_options.addOptionChaining("slave", "slave", moe::Switch, "slave mode"); ms_options.addOptionChaining("source", "source", moe::String, "when slave: specify master as <server:port>"); ms_options.addOptionChaining("only", "only", moe::String, "when slave: specify a single database to replicate"); ms_options.addOptionChaining("slavedelay", "slavedelay", moe::Int, "specify delay (in seconds) to be used when applying master ops to slave"); ms_options.addOptionChaining("autoresync", "autoresync", moe::Switch, "automatically resync if slave data is stale"); rs_options.addOptionChaining("replSet", "replSet", moe::String, "arg is <setname>[/<optionalseedhostlist>]"); rs_options.addOptionChaining("replIndexPrefetch", "replIndexPrefetch", moe::String, "specify index prefetching behavior (if secondary) [none|_id_only|all]"); sharding_options.addOptionChaining("configsvr", "configsvr", moe::Switch, "declare this is a config db of a cluster; default port 27019; " "default dir /data/configdb"); sharding_options.addOptionChaining("shardsvr", "shardsvr", moe::Switch, "declare this is a shard db of a cluster; default port 27018"); sharding_options.addOptionChaining("noMoveParanoia", "noMoveParanoia", moe::Switch, "turn off paranoid saving of data for the moveChunk command; default") .hidden(); sharding_options.addOptionChaining("moveParanoia", "moveParanoia", moe::Switch, "turn on paranoid saving of data during the moveChunk command " "(used for internal system diagnostics)") .hidden(); options->addSection(general_options); #if defined(_WIN32) options->addSection(windows_scm_options); #endif options->addSection(replication_options); options->addSection(ms_options); options->addSection(rs_options); options->addSection(sharding_options); #ifdef MONGO_SSL options->addSection(ssl_options); #endif options->addOptionChaining("fastsync", "fastsync", moe::Switch, "indicate that this instance is starting from a dbpath snapshot of the repl peer") .hidden(); options->addOptionChaining("pretouch", "pretouch", moe::Int, "n pretouch threads for applying master/slave operations") .hidden(); // This is a deprecated option that we are supporting for backwards compatibility // The first value for this option can be either 'dbpath' or 'run'. // If it is 'dbpath', mongod prints the dbpath and exits. Any extra values are ignored. // If it is 'run', mongod runs normally. Providing extra values is an error. options->addOptionChaining("command", "command", moe::StringVector, "command") .hidden() .positional(1, 3); options->addOptionChaining("cacheSize", "cacheSize", moe::Long, "cache size (in MB) for rec store") .hidden(); options->addOptionChaining("nodur", "nodur", moe::Switch, "disable journaling") .hidden(); // things we don't want people to use options->addOptionChaining("nohints", "nohints", moe::Switch, "ignore query hints") .hidden(); options->addOptionChaining("nopreallocj", "nopreallocj", moe::Switch, "don't preallocate journal files") .hidden(); options->addOptionChaining("dur", "dur", moe::Switch, "enable journaling") .hidden(); options->addOptionChaining("durOptions", "durOptions", moe::Int, "durability diagnostic options") .hidden(); // deprecated pairing command line options options->addOptionChaining("pairwith", "pairwith", moe::Switch, "DEPRECATED") .hidden(); options->addOptionChaining("arbiter", "arbiter", moe::Switch, "DEPRECATED") .hidden(); options->addOptionChaining("opIdMem", "opIdMem", moe::Switch, "DEPRECATED") .hidden(); return Status::OK(); }
Status addMongosOptions(moe::OptionSection* options) { moe::OptionSection general_options("General options"); Status ret = addGeneralServerOptions(&general_options); if (!ret.isOK()) { return ret; } #if defined(_WIN32) moe::OptionSection windows_scm_options("Windows Service Control Manager options"); ret = addWindowsServerOptions(&windows_scm_options); if (!ret.isOK()) { return ret; } #endif #ifdef MONGO_CONFIG_SSL moe::OptionSection ssl_options("SSL options"); ret = addSSLServerOptions(&ssl_options); if (!ret.isOK()) { return ret; } #endif moe::OptionSection sharding_options("Sharding options"); sharding_options.addOptionChaining("sharding.configDB", "configdb", moe::String, "Connection string for communicating with config servers:\n" "<config replset name>/<host1:port>,<host2:port>,[...]"); sharding_options.addOptionChaining( "replication.localPingThresholdMs", "localThreshold", moe::Int, "ping time (in ms) for a node to be considered local (default 15ms)"); sharding_options.addOptionChaining("test", "test", moe::Switch, "just run unit tests") .setSources(moe::SourceAllLegacy); /** Javascript Options * As a general rule, js enable/disable options are ignored for mongos. * However, we define and hide these options so that if someone * were to use these args in a set of options meant for both * mongos and mongod runs, the mongos won't fail on an unknown argument. * * These options have no affect on how the mongos runs. * Setting either or both to *any* value will provoke a warning message * and nothing more. */ sharding_options .addOptionChaining("noscripting", "noscripting", moe::Switch, "disable scripting engine") .hidden() .setSources(moe::SourceAllLegacy); general_options .addOptionChaining( "security.javascriptEnabled", "", moe::Bool, "Enable javascript execution") .hidden() .setSources(moe::SourceYAMLConfig); options->addSection(general_options).transitional_ignore(); #if defined(_WIN32) options->addSection(windows_scm_options).transitional_ignore(); #endif options->addSection(sharding_options).transitional_ignore(); #ifdef MONGO_CONFIG_SSL options->addSection(ssl_options).transitional_ignore(); #endif return Status::OK(); }
int main(int argc, char* argv[]) { std::vector<fs::path> from; std::vector<fs::path> entity; std::string ignore_string, save_base; unsigned int threshold; unsigned int person_threshold; bool no_individuals; bool remove_most_common; std::vector<unsigned int> save_at_v; std::set<unsigned int> save_at; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("ignore", po::value<std::string>(&ignore_string)->default_value("@lists\\.|@googlegroups\\.|@yahoogroups\\.|@mailman\\.|@facebookmail\\.|noreply|do[-_]not[-_]reply|^buzz\\+"), "ignore messages with a recipient matching this expression") ("entity-raw", po::value<std::vector<fs::path> >(&entity), "paths to load data ONLY for entities") ("load-raw", po::value<std::vector<fs::path> >(&from), "paths to load data from"); po::options_description run_options("Export Options"); run_options.add_options() ("save", po::value<std::string>(&save_base), "base path to save the data at") ("remove-most-common", po::value<bool>(&remove_most_common)->default_value(1), "remove the most common individual (owner)") ("no-individuals", po::value<bool>(&no_individuals)->default_value(0), "ignore individuals") ("threshold", po::value<unsigned int>(&threshold)->default_value(1), "minimum mails for group") ("person-threshold", po::value<unsigned int>(&person_threshold)->default_value(2), "minimum mails for person"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(run_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } std::copy(save_at_v.begin(), save_at_v.end(), std::inserter(save_at, save_at.end())); email_id_bimap email_id; connectedness_graph cg; initial_group_partition_map igpm; entity_map em; if(!vm.count("load-raw")) { std::cout << "must load something" << std::endl; return 1; } if(!vm.count("save")) { std::cout << "must save something" << std::endl; return 1; } std::size_t max_id = 0; std::vector<char> buffer(128 * 1024); try { boost::regex re_ignore(ignore_string); boost::regex re_loader("([^\t]+)"); std::cout << "resolving entities" << std::endl; for(std::vector<fs::path>::iterator i = entity.begin(); i != entity.end(); ++i) { if(!fs::exists(*i)) throw std::runtime_error(std::string("input file not found: ") + i->file_string()); std::cout << "loading " << i->file_string(); fs::ifstream in(*i); //we don't care about messages here while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line == "-") { break; } bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; } else { std::string email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { continue; } std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; } } } while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); std::string email_address; bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { break; } } else { std::string name = (*j)[0].str(); try { em[email_address].insert(name); } catch(std::exception& e) { std::cout << "err missing: " << email_address << std::endl; throw; } } } } std::cout << std::endl; } resolve_entities(em, email_id); for(std::vector<fs::path>::iterator i = from.begin(); i != from.end(); ++i) { if(!fs::exists(*i)) throw std::runtime_error(std::string("input file not found: ") + i->file_string()); std::cout << "loading " << i->file_string(); fs::ifstream in(*i); while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line == "-") { break; } members_t g; unsigned int count = 0; bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; std::string number = (*j)[0].str(); count = boost::lexical_cast<unsigned int>(number); } else { std::string email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { g.clear(); continue; } std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; g.insert(result.first->second); } } if(g.empty()) { //no emails? wtfs continue; } initial_group_partition_map::iterator r = igpm.find(g); if(r == igpm.end()) { connectedness_graph::vertex_descriptor node = gr::add_vertex(cg); cg[node].members = g; cg[node].weight = count; igpm.insert(r, std::make_pair(g, node)); } else { connectedness_graph::vertex_descriptor node = r->second; cg[node].weight += count; } std::cout << "." << std::flush; } //no need to load em std::cout << std::endl; } max_id = email_id.size(); } catch(std::exception& e) { std::cout << "failed to load data: " << e.what() << std::endl; return 1; } std::map<unsigned int, score_t> ppl; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { if(cg[*i].weight >= threshold) { for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { ppl[*j] += cg[*i].weight; } ++i; } else { connectedness_graph::vertex_iterator to_erase = i++; gr::clear_vertex(*to_erase, cg); gr::remove_vertex(*to_erase, cg); } } //remove the owner, todo, this is evil because now there are dupe groups if A was owner and A B C and B C existed if(!ppl.empty()) { if(remove_most_common) { unsigned int max_person = ppl.begin()->first; score_t max_val = ppl.begin()->second; for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end(); ++j) { if(j->second > max_val) { max_val = j->second; max_person = j->first; } } for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { cg[*i].members.erase(max_person); if(cg[*i].members.empty()) { connectedness_graph::vertex_iterator to_delete = i; ++i; gr::clear_vertex(*to_delete, cg); gr::remove_vertex(*to_delete, cg); } else { ++i; } } } for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end();) { if(j->second >= person_threshold) { std::map<unsigned int, score_t>::iterator to_delete = j++; ppl.erase(to_delete); } else { ++j; } } for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end(); ++j) { for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { cg[*i].members.erase(j->first); if(cg[*i].members.empty()) { connectedness_graph::vertex_iterator to_delete = i; ++i; gr::clear_vertex(*to_delete, cg); gr::remove_vertex(*to_delete, cg); } else { ++i; } } } } if(no_individuals) { for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { if(cg[*i].members.size() > 1) { ++i; } else { connectedness_graph::vertex_iterator to_erase = i++; gr::clear_vertex(*to_erase, cg); gr::remove_vertex(*to_erase, cg); } } } //normalize group weights for large groups for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { if(cg[*i].members.size() < 20) continue; cg[*i].weight *= score_t(20) / cg[*i].members.size(); } unsigned int vertex_number = 0; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { cg[*i].index = vertex_number++; } std::cout << "converting to person graph" << std::endl; people_graph pg; std::map<unsigned int, people_graph::vertex_descriptor> remaining_people; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { group& g = cg[*i]; for(members_t::const_iterator j = g.members.begin(); j != g.members.end(); ++j) { //if there is a new person represented add them to the map std::pair<std::map<unsigned int, people_graph::vertex_descriptor>::iterator, bool> res = remaining_people.insert(std::make_pair(*j, people_graph::vertex_descriptor())); if(res.second) { res.first->second = gr::add_vertex(pg); person& p = pg[res.first->second]; p.id = res.first->first; p.name = email_id.by<bit>().equal_range(p.id).first->second; } } } for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { group& g = cg[*i]; for(members_t::const_iterator j = g.members.begin(); j != g.members.end(); ++j) { members_t::const_iterator k = j; for(++k; k != g.members.end(); ++k) { //duplicates eliminated by setS type container people_graph::edge_descriptor l = gr::add_edge(remaining_people[*j], remaining_people[*k], pg).first; edge& e = pg[l]; e.weight += g.weight; } } } fs::path path(save_base); if(fs::exists(path)) fs::remove(path); fs::ofstream out(path); gr::dynamic_properties dp; dp.property("label", get(&person::name, pg)); dp.property("weight", gr::get(&edge::weight, pg)); gr::write_graphml(out, pg, gr::get(&person::id, pg), dp, false); return 0; }