int main(int argc, char* argv[]) { std::string server; std::string port; std::string user; std::string folder; std::string password; fs::path from; fs::path to; bool inbox; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("save-raw", po::value<fs::path>(&to), "path to save the data (after download phase)"); po::options_description source_options("Download"); source_options.add_options() ("load", po::value<fs::path>(&from), "mail folder"); po::options_description run_options("Run"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(source_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } email_id_bimap email_id; connectedness_graph cg; entity_map em; initial_group_partition_map igpm; message_id_set message_id; if(!vm.count("save-raw")) { std::cout << "you must specify --save-raw with a file name" << std::endl; return 1; } if(!vm.count("load") || !fs::exists(from) || !fs::is_directory(from)) { std::cout << "missing source data folder (or not a folder)" << std::endl; return 1; } std::vector<char> buffer(128 * 1024); std::string headers; std::cout << "loading " << from; for(fs::recursive_directory_iterator fe, fi(from); fi != fe; ++fi) { if(fs::is_directory(*fi)) continue; fs::ifstream in(*fi); headers.clear(); while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line.empty()) break; headers += "\n"; headers += line; } members_t g; for(boost::sregex_iterator i(headers.begin(), headers.end(), re_terms), e; i != e; ++i) { const boost::smatch& what = *i; std::string field = what[1].str(); if(boost::algorithm::iequals(field, "Message-ID")) { std::string id = what[2].str(); boost::algorithm::trim(id); std::pair<message_id_set::iterator, bool> result = message_id.insert(id); //skip this duplicate message if(!result.second) { g.clear(); break; } } else if(boost::algorithm::iequals(field, "From") || boost::algorithm::iequals(field, "To") || boost::algorithm::iequals(field, "Cc")) { std::string data = what[2].str(); boost::replace_all(data, "\n", ""); boost::replace_all(data, "\r", ""); for(boost::sregex_iterator j(data.begin(), data.end(), re_email), e; j != e; ++j) { std::string name = (*j)[1].str(); if(name.empty()) name = (*j)[2].str(); std::string email_address = (*j)[3].str(); boost::algorithm::to_lower(email_address); boost::algorithm::trim(name); std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; if(!name.empty() && boost::to_lower_copy(name) != email_address) em[email_address].insert(name); g.insert(result.first->second); } } } if(g.empty()) { continue; } initial_group_partition_map::iterator r = igpm.find(g); if(r == igpm.end()) { connectedness_graph::vertex_descriptor node = gr::add_vertex(cg); cg[node].members = g; cg[node].weight = 1; igpm.insert(r, std::make_pair(g, node)); } else { connectedness_graph::vertex_descriptor node = r->second; cg[node].weight++; } std::cout << ". " << std::flush; } std::cout << std::endl; if(fs::exists(to)) fs::remove(to); fs::ofstream out(to); std::cout << "saving data to " << to.file_string(); for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { out << (unsigned long long)cg[*i].weight; for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { out << "\t" << email_id.by<bit>().equal_range(*j).first->second; } out << std::endl; } out << "-" << std::endl; for(entity_map::iterator i = em.begin(); i != em.end(); ++i) { out << i->first; for(std::set<std::string>::iterator k = i->second.begin(); k != i->second.end(); ++k) { out << "\t" << *k; } out << std::endl; } return 0; }
static bool test_recursive(ilzham &lzham_dll, const char *pPath, comp_options options) { string_array files; if (!find_files(pPath, "*", files, true)) { print_error("Failed finding files under path \"%s\"!\n", pPath); return false; } uint total_files_compressed = 0; uint64 total_source_size = 0; uint64 total_comp_size = 0; #ifdef WIN32 MEMORYSTATUS initial_mem_status; GlobalMemoryStatus(&initial_mem_status); #endif timer_ticks start_tick_count = timer::get_ticks(); const int first_file_index = 0; uint unique_id = static_cast<uint>(timer::get_init_ticks()); char cmp_file[256], decomp_file[256]; #ifdef _XBOX sprintf(cmp_file, "e:\\__comp_temp_%u__.tmp", unique_id); sprintf(decomp_file, "e:\\__decomp_temp_%u__.tmp", unique_id); #else sprintf(cmp_file, "__comp_temp_%u__.tmp", unique_id); sprintf(decomp_file, "__decomp_temp_%u__.tmp", unique_id); #endif for (uint file_index = first_file_index; file_index < files.size(); file_index++) { const std::string &src_file = files[file_index]; printf("***** [%u of %u] Compressing file \"%s\" to \"%s\"\n", 1 + file_index, (uint)files.size(), src_file.c_str(), cmp_file); FILE *pFile = fopen(src_file.c_str(), "rb"); if (!pFile) { printf("Skipping unreadable file \"%s\"\n", src_file.c_str()); continue; } fseek(pFile, 0, SEEK_END); int64 src_file_size = _ftelli64(pFile); fclose(pFile); if (!ensure_file_is_writable(cmp_file)) { print_error("Unable to create file \"%s\"!\n", cmp_file); return false; } comp_options file_options(options); if (options.m_randomize_params) { file_options.m_comp_level = static_cast<lzham_compress_level>(rand() % LZHAM_TOTAL_COMP_LEVELS); file_options.m_dict_size_log2 = LZHAM_MIN_DICT_SIZE_LOG2 + (rand() % (LZHAMTEST_MAX_POSSIBLE_DICT_SIZE - LZHAM_MIN_DICT_SIZE_LOG2 + 1)); file_options.m_max_helper_threads = rand() % (LZHAM_MAX_HELPER_THREADS + 1); file_options.m_unbuffered_decompression = (rand() & 1) != 0; #if !LZHAMTEST_NO_RANDOM_EXTREME_PARSING file_options.m_extreme_parsing = (rand() & 1) != 0; #endif file_options.m_force_polar_codes = (rand() & 1) != 0; file_options.m_deterministic_parsing = (rand() & 1) != 0; file_options.print(); } bool status = compress_streaming(lzham_dll, src_file.c_str(), cmp_file, file_options); if (!status) { print_error("Failed compressing file \"%s\" to \"%s\"\n", src_file.c_str(), cmp_file); return false; } if (file_options.m_verify_compressed_data) { printf("Decompressing file \"%s\" to \"%s\"\n", cmp_file, decomp_file); if (!ensure_file_is_writable(decomp_file)) { print_error("Unable to create file \"%s\"!\n", decomp_file); return false; } status = decompress_file(lzham_dll, cmp_file, decomp_file, file_options); if (!status) { print_error("Failed decompressing file \"%s\" to \"%s\"\n", src_file.c_str(), decomp_file); return false; } printf("Comparing file \"%s\" to \"%s\"\n", decomp_file, src_file.c_str()); if (!compare_files(decomp_file, src_file.c_str())) { print_error("Failed comparing decompressed file data while compressing \"%s\" to \"%s\"\n", src_file.c_str(), cmp_file); return false; } else { printf("Decompressed file compared OK to original file.\n"); } } int64 cmp_file_size = 0; pFile = fopen(cmp_file, "rb"); if (pFile) { fseek(pFile, 0, SEEK_END); cmp_file_size = _ftelli64(pFile); fclose(pFile); } total_files_compressed++; total_source_size += src_file_size; total_comp_size += cmp_file_size; #ifdef WIN32 MEMORYSTATUS mem_status; GlobalMemoryStatus(&mem_status); #ifdef _XBOX const int64 bytes_allocated = initial_mem_status.dwAvailPhys - mem_status.dwAvailPhys; #else const int64 bytes_allocated = initial_mem_status.dwAvailVirtual- mem_status.dwAvailVirtual; #endif printf("Memory allocated relative to first file: %I64i\n", bytes_allocated); #endif printf("\n"); } timer_ticks end_tick_count = timer::get_ticks(); double total_elapsed_time = timer::ticks_to_secs(end_tick_count - start_tick_count); printf("Test successful: %f secs\n", total_elapsed_time); printf("Total files processed: %u\n", total_files_compressed); printf("Total source size: " QUAD_INT_FMT "\n", total_source_size); printf("Total compressed size: " QUAD_INT_FMT "\n", total_comp_size); remove(cmp_file); remove(decomp_file); return true; }
int main(int argc, char* argv[]) { std::string server; std::string port; std::string user; std::string folder; std::string password; std::vector<fs::path> from; std::vector<fs::path> entity; fs::path to; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("save-raw", po::value<fs::path>(&to), "path to save the data (after download phase)"); po::options_description download_options("Download"); download_options.add_options() ("server", po::value<std::string>(&server), "imap server dns/ip") ("port", po::value<std::string>(&port)->default_value("993"), "imap port") ("folder", po::value<std::string>(&folder)->default_value("Sent"), "imap folder") ("user", po::value<std::string>(&user), "imap username") ("password", po::value<std::string>(&password), "imap password (will ask if not specified)"); po::options_description run_options("Run"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(download_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } email_id_bimap email_id; connectedness_graph cg; entity_map em; initial_group_partition_map igpm; if(!vm.count("save-raw")) { std::cout << "you must specify --save-raw with a file name" << std::endl; return 1; } if(!vm.count("password")) { password = getpass("Password: "******"missing server for download" << std::endl; return 1; } if(user.empty()) { std::cout << "missing user for download" << std::endl; return 1; } if(password.empty()) { std::cout << "missing user for download" << std::endl; return 1; } //this is our network block, downloads all messages headers try { std::cout << "downloading " << folder << " from " << server << std::endl; //use to dedupe if there are dupes message_id_set message_id; typedef boost::function<void (const std::string&, const std::list<std::string>& args)> untagged_handler; std::string pending_tag = "* "; std::list<std::string> pending_command; pending_command.push_back("WAIT_FOR_ACK"); untagged_handler pending_handler; unsigned int command_id = 0; //The sequence of imap commands we want to run std::list<std::list<std::string> > commands; std::list<untagged_handler> handlers; handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); std::ostringstream login_os; login_os << "LOGIN \"" << user << "\" {" << password.size() << "}"; commands.back().push_back(login_os.str()); commands.back().push_back(password); handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); commands.back().push_back("LIST \"\" *"); handlers.push_back(log_handler()); commands.push_back(std::list<std::string>()); commands.back().push_back("SELECT \"" + folder + "\""); handlers.push_back(header_handler(email_id, cg, em, message_id, igpm)); commands.push_back(std::list<std::string>()); commands.back().push_back("FETCH 1:* (BODY.PEEK[HEADER.FIELDS (MESSAGE-ID FROM TO CC)])"); commands.push_back(std::list<std::string>()); handlers.push_back(log_handler()); commands.back().push_back("LOGOUT"); //open ssl connection to the server, no cert checking asio::io_service io_service; asio::ip::tcp::resolver resolver(io_service); asio::ip::tcp::resolver::query query(server, port); asio::ip::tcp::resolver::iterator iterator = resolver.resolve(query); asio::ssl::context context(io_service, asio::ssl::context::sslv23); context.set_verify_mode(asio::ssl::context::verify_none); asio::ssl::stream<asio::ip::tcp::socket> socket(io_service, context); socket.lowest_layer().connect(*iterator); socket.handshake(asio::ssl::stream_base::client); asio::streambuf buf; while(true) { //read the next line of data std::size_t line_length = asio::read_until(socket, buf, re_crlf); std::string line( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); boost::match_results<std::string::iterator> what; std::size_t initial = 0; std::list<std::string> args; //the line may be split into segments with chunks of data embedded, this is the case //for bodies or message header blocks that are returned, we only handle this case if it //comes in untagged response (*) not a continuation (+), i think that is normal while(regex_search(line.begin() + initial, line.end(), what, re_byte_buffer, boost::match_default)) { unsigned int bytes = boost::lexical_cast<unsigned int>(what[1].str()); if(buf.size() < bytes) asio::read(socket, buf, asio::transfer_at_least(bytes - buf.size())); args.push_back( std::string( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + bytes)); buf.consume(bytes); line.resize(what[1].second - line.begin()); initial = line.size(); //read the next line of data line_length = asio::read_until(socket, buf, re_crlf); line += std::string( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); } if(boost::algorithm::starts_with(line, pending_tag)) { //if the command is being completed, then we will go here, bail out if the response wasn't ok if(!boost::algorithm::starts_with(line, pending_tag + "OK")) { std::cout << line; throw std::runtime_error("command failed"); } //pull the next command off the list pending_tag = "A" + boost::lexical_cast<std::string>(command_id++) + " "; if(commands.size() == 0) break; pending_handler = handlers.front(); pending_command = commands.front(); commands.pop_front(); handlers.pop_front(); //send the command along with any data arguments std::cout << pending_tag << pending_command.front() << std::endl; asio::write(socket, asio::buffer(pending_tag.data(), pending_tag.size())); for(std::list<std::string>::iterator i = pending_command.begin(); i != pending_command.end(); ++i) { if(i != pending_command.begin()) { //print the continuation response std::size_t line_length = asio::read_until(socket, buf, re_crlf); std::string line( asio::buffers_begin(buf.data()), asio::buffers_begin(buf.data()) + line_length); buf.consume(line_length); std::cout << line << std::flush; if(!boost::algorithm::starts_with(line, "+ ")) { throw std::runtime_error("bad response when writing extra data"); } } else { //print it out as well (but not the args) std::cout << *i << std::endl; } asio::write(socket, asio::buffer(i->data(), i->size())); asio::write(socket, asio::buffer("\r\n", 2)); } } else if(boost::algorithm::starts_with(line, "* ")) { //if there is a registered handler, dispatch to it if(pending_handler) pending_handler(line, args); } else { throw std::runtime_error("unrecognized response"); } } } catch (std::exception& e) { std::cout << "Exception: " << e.what() << std::endl; return 1; } std::cout << std::endl; if(to.empty()) { std::cout << "Missing output file for save" << std::endl; return 1; } if(fs::exists(to)) fs::remove(to); fs::ofstream out(to); std::cout << "saving data to " << to.file_string(); for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { out << (unsigned long long)cg[*i].weight; for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { out << "\t" << email_id.by<bit>().equal_range(*j).first->second; } out << std::endl; } out << "-" << std::endl; for(entity_map::iterator i = em.begin(); i != em.end(); ++i) { out << i->first; for(std::set<std::string>::iterator k = i->second.begin(); k != i->second.end(); ++k) { out << "\t" << *k; } out << std::endl; } return 0; }
int main(int argc, char* argv[]) { std::vector<fs::path> from; std::vector<fs::path> entity; std::string ignore_string, save_base; unsigned int threshold; unsigned int person_threshold; bool no_individuals; bool remove_most_common; std::vector<unsigned int> save_at_v; std::set<unsigned int> save_at; //[Gmail]/Sent Mail po::options_description general_options("General"); general_options.add_options() ("help", "list options"); po::options_description file_options("Load"); file_options.add_options() ("ignore", po::value<std::string>(&ignore_string)->default_value("@lists\\.|@googlegroups\\.|@yahoogroups\\.|@mailman\\.|@facebookmail\\.|noreply|do[-_]not[-_]reply|^buzz\\+"), "ignore messages with a recipient matching this expression") ("entity-raw", po::value<std::vector<fs::path> >(&entity), "paths to load data ONLY for entities") ("load-raw", po::value<std::vector<fs::path> >(&from), "paths to load data from"); po::options_description run_options("Export Options"); run_options.add_options() ("save", po::value<std::string>(&save_base), "base path to save the data at") ("remove-most-common", po::value<bool>(&remove_most_common)->default_value(1), "remove the most common individual (owner)") ("no-individuals", po::value<bool>(&no_individuals)->default_value(0), "ignore individuals") ("threshold", po::value<unsigned int>(&threshold)->default_value(1), "minimum mails for group") ("person-threshold", po::value<unsigned int>(&person_threshold)->default_value(2), "minimum mails for person"); po::options_description all_options("Email Topology Options"); all_options .add(general_options) .add(file_options) .add(run_options); if(argc < 2) { std::cout << all_options << std::endl; return 1; } po::variables_map vm; try { int options_style = po::command_line_style::default_style; po::store(po::parse_command_line(argc, argv, all_options, options_style), vm); po::notify(vm); } catch(std::exception& e) { std::cout << all_options << std::endl; std::cout << "Command line parsing failed: " << e.what() << std::endl; return 1; } if(vm.count("help")) { std::cout << all_options << std::endl; return 1; } std::copy(save_at_v.begin(), save_at_v.end(), std::inserter(save_at, save_at.end())); email_id_bimap email_id; connectedness_graph cg; initial_group_partition_map igpm; entity_map em; if(!vm.count("load-raw")) { std::cout << "must load something" << std::endl; return 1; } if(!vm.count("save")) { std::cout << "must save something" << std::endl; return 1; } std::size_t max_id = 0; std::vector<char> buffer(128 * 1024); try { boost::regex re_ignore(ignore_string); boost::regex re_loader("([^\t]+)"); std::cout << "resolving entities" << std::endl; for(std::vector<fs::path>::iterator i = entity.begin(); i != entity.end(); ++i) { if(!fs::exists(*i)) throw std::runtime_error(std::string("input file not found: ") + i->file_string()); std::cout << "loading " << i->file_string(); fs::ifstream in(*i); //we don't care about messages here while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line == "-") { break; } bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; } else { std::string email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { continue; } std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; } } } while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); std::string email_address; bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { break; } } else { std::string name = (*j)[0].str(); try { em[email_address].insert(name); } catch(std::exception& e) { std::cout << "err missing: " << email_address << std::endl; throw; } } } } std::cout << std::endl; } resolve_entities(em, email_id); for(std::vector<fs::path>::iterator i = from.begin(); i != from.end(); ++i) { if(!fs::exists(*i)) throw std::runtime_error(std::string("input file not found: ") + i->file_string()); std::cout << "loading " << i->file_string(); fs::ifstream in(*i); while(in.good()) { in.getline(&buffer[0], buffer.size()); std::string line = &buffer[0]; boost::algorithm::trim(line); if(line == "-") { break; } members_t g; unsigned int count = 0; bool first = true; for(boost::sregex_iterator j(line.begin(), line.end(), re_loader), e; j != e; ++j) { if(first) { first = false; std::string number = (*j)[0].str(); count = boost::lexical_cast<unsigned int>(number); } else { std::string email_address = (*j)[0].str(); if(regex_search(email_address, re_ignore)) { g.clear(); continue; } std::pair<email_id_bimap::map_by<email>::iterator, bool> result = email_id.by<email>().insert( email_id_bimap::map_by<email>::value_type(email_address, email_id.size())); if(result.second) std::cout << "@" << std::flush; g.insert(result.first->second); } } if(g.empty()) { //no emails? wtfs continue; } initial_group_partition_map::iterator r = igpm.find(g); if(r == igpm.end()) { connectedness_graph::vertex_descriptor node = gr::add_vertex(cg); cg[node].members = g; cg[node].weight = count; igpm.insert(r, std::make_pair(g, node)); } else { connectedness_graph::vertex_descriptor node = r->second; cg[node].weight += count; } std::cout << "." << std::flush; } //no need to load em std::cout << std::endl; } max_id = email_id.size(); } catch(std::exception& e) { std::cout << "failed to load data: " << e.what() << std::endl; return 1; } std::map<unsigned int, score_t> ppl; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { if(cg[*i].weight >= threshold) { for(members_t::iterator j = cg[*i].members.begin(); j != cg[*i].members.end(); ++j) { ppl[*j] += cg[*i].weight; } ++i; } else { connectedness_graph::vertex_iterator to_erase = i++; gr::clear_vertex(*to_erase, cg); gr::remove_vertex(*to_erase, cg); } } //remove the owner, todo, this is evil because now there are dupe groups if A was owner and A B C and B C existed if(!ppl.empty()) { if(remove_most_common) { unsigned int max_person = ppl.begin()->first; score_t max_val = ppl.begin()->second; for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end(); ++j) { if(j->second > max_val) { max_val = j->second; max_person = j->first; } } for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { cg[*i].members.erase(max_person); if(cg[*i].members.empty()) { connectedness_graph::vertex_iterator to_delete = i; ++i; gr::clear_vertex(*to_delete, cg); gr::remove_vertex(*to_delete, cg); } else { ++i; } } } for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end();) { if(j->second >= person_threshold) { std::map<unsigned int, score_t>::iterator to_delete = j++; ppl.erase(to_delete); } else { ++j; } } for(std::map<unsigned int, score_t>::iterator j = ppl.begin(); j != ppl.end(); ++j) { for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { cg[*i].members.erase(j->first); if(cg[*i].members.empty()) { connectedness_graph::vertex_iterator to_delete = i; ++i; gr::clear_vertex(*to_delete, cg); gr::remove_vertex(*to_delete, cg); } else { ++i; } } } } if(no_individuals) { for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second;) { if(cg[*i].members.size() > 1) { ++i; } else { connectedness_graph::vertex_iterator to_erase = i++; gr::clear_vertex(*to_erase, cg); gr::remove_vertex(*to_erase, cg); } } } //normalize group weights for large groups for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { if(cg[*i].members.size() < 20) continue; cg[*i].weight *= score_t(20) / cg[*i].members.size(); } unsigned int vertex_number = 0; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { cg[*i].index = vertex_number++; } std::cout << "converting to person graph" << std::endl; people_graph pg; std::map<unsigned int, people_graph::vertex_descriptor> remaining_people; for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { group& g = cg[*i]; for(members_t::const_iterator j = g.members.begin(); j != g.members.end(); ++j) { //if there is a new person represented add them to the map std::pair<std::map<unsigned int, people_graph::vertex_descriptor>::iterator, bool> res = remaining_people.insert(std::make_pair(*j, people_graph::vertex_descriptor())); if(res.second) { res.first->second = gr::add_vertex(pg); person& p = pg[res.first->second]; p.id = res.first->first; p.name = email_id.by<bit>().equal_range(p.id).first->second; } } } for(connectedness_graph::vertex_iterator i = gr::vertices(cg).first; i != gr::vertices(cg).second; ++i) { group& g = cg[*i]; for(members_t::const_iterator j = g.members.begin(); j != g.members.end(); ++j) { members_t::const_iterator k = j; for(++k; k != g.members.end(); ++k) { //duplicates eliminated by setS type container people_graph::edge_descriptor l = gr::add_edge(remaining_people[*j], remaining_people[*k], pg).first; edge& e = pg[l]; e.weight += g.weight; } } } fs::path path(save_base); if(fs::exists(path)) fs::remove(path); fs::ofstream out(path); gr::dynamic_properties dp; dp.property("label", get(&person::name, pg)); dp.property("weight", gr::get(&edge::weight, pg)); gr::write_graphml(out, pg, gr::get(&person::id, pg), dp, false); return 0; }