void ORD::buildGraph() { Graph::GraphCreatorFile creator(this->filename); Graph::GraphUtil util; Graph::GraphProperties properties; int wr; int size; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &wr); G = creator.create_weighted_mutable_graph(); properties.make_canonical(G); char compname[512]; if (!properties.is_connected(G)) { Graph::GraphReaderWriterFactory factory; Graph::VertexWeightedGraph *H; // Since we are going to proceed with processing largest // component , set a mark as not original and write largest // component into a file. this->set_original(false); vector<list<int> *> members; int comp; comp = util.find_all_components(G, &members); DEBUG("found %d components\n", comp); int i = 0; int gsize = 0; int index = 0; for (int j = 0; j < comp; j++) { i = members[j]->size(); if (i > gsize) { gsize = i; index = j; sprintf(compname, "%s_%d_comp", filename.c_str(), gsize); DEBUG("new larger comp : %d\n", gsize); } } this->set_filename(compname); H = creator.create_component(G, members[index], true); delete G; G = H; // Change the label into 1-base DIMACS format int j = 0; int gs = G->get_num_nodes(); Graph::Node *n; for (int i = 0 ; i < gs; i++) { n = G->get_node(i); n->set_label(i+1); } } }
TEST_F(GraphCreatorFileTest, testCreateComponent) { Graph::WeightedMutableGraph *wmg; Graph::WeightedMutableGraph *wmg_sub; Graph::GraphUtil util; creator->set_file_name("../data/1et.64.txt"); creator->set_graph_type("DIMACS"); wmg = creator->create_weighted_mutable_graph(); list<int> f; list<int>::iterator it; list<int>::iterator jt; list<int>::iterator kt; util.find_component(wmg, 25, &f); wmg_sub = creator->create_component(wmg, &f, true); EXPECT_EQ(f.size(), wmg_sub->get_num_nodes()) ; vector<Graph::Node> wmg_nodes = wmg->get_nodes(); vector<Graph::Node> wmg_sub_nodes = wmg_sub->get_nodes(); int i = 0; int j = 0; int n = wmg_sub->get_num_nodes(); it = f.begin(); for (i = 0; i < n; i++) { EXPECT_EQ(wmg_nodes[*it].get_label(), wmg_sub_nodes[i].get_label()) ; EXPECT_EQ(wmg_nodes[*it].get_degree(), wmg_sub_nodes[i].get_degree()) ; list<int> wmg_nbrs = wmg_nodes[*it].get_nbrs(); list<int> wmg_sub_nbrs = wmg_sub_nodes[i].get_nbrs(); wmg_nbrs.sort(); wmg_sub_nbrs.sort(); for (jt = wmg_nbrs.begin(), kt = wmg_sub_nbrs.begin(); jt != wmg_nbrs.end(); ++jt, ++kt) { ASSERT_EQ(wmg_nodes[*jt].get_label(), wmg_sub_nodes[*kt].get_label()) ; } ++it; } }
TEST_F(GraphCreatorFileTest, testCreateRecAllComponents) { Graph::GraphUtil util; Graph::WeightedMutableGraph *wmg; creator->set_file_name("../data/1et.64.txt"); creator->set_graph_type("DIMACS"); wmg = creator->create_weighted_mutable_graph(); vector<list<int> *> members; int x = util.find_all_components(wmg, &members); EXPECT_EQ(x, members.size()) ; list<Graph::WeightedMutableGraph *> cmembers; cmembers = creator->create_rec_all_components(wmg, true); EXPECT_EQ(cmembers.size(), members.size()) ; int wmg_size = 0; int mem_size = members[3]->size(); list<Graph::WeightedMutableGraph *>::iterator giter; Graph::WeightedMutableGraph cmg; vector<Graph::Node> nodes; for (giter = cmembers.begin(); giter != cmembers.end(); ++giter) { if (mem_size == (*giter)->get_num_nodes()) { nodes = (*giter)->get_nodes(); break; } } list<int>::iterator it; int i = 0; for (it = members[3]->begin(); it != members[3]->end(); ++it) { EXPECT_EQ((*it), nodes[i].get_label() - 1); i++; } }
int main(int argc, char **argv){ string infile; string outfilename; string outprefix; string apspinputfilename; string lcc_apspinputfilename; ofstream outfile; ofstream timing_file; bool record_timings = false; bool file_append = false; bool run_largest_cc = true; string intype ("edge"); std::map<string, bool> req_methods; std::map<string, bool> val_methods; ORB_t t1, t2; int spectrum_spread = 0; create_map(allowed_methods, val_methods); parse_options(argc, argv, infile, intype, outfilename, outprefix, req_methods, record_timings, file_append, run_largest_cc, &spectrum_spread, apspinputfilename, lcc_apspinputfilename); if(outfilename.length() == 0){ if(outprefix.length() != 0){ outfilename = outprefix + ".stats"; } else { outfilename = "graph-stats.txt"; } } if(outprefix.length() == 0){ outprefix = infile; } // we'd like higher precision when printing values std::cout.precision(10); #ifdef MPI_VERSION MPI_Init(&argc, &argv); int myrank; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); if(myrank == 0){ #endif cout << "done parsing options" << endl; cout << "Input file: " << infile << endl; cout << "Input type: " << intype << endl; cout << "Output file: " << outfilename << endl; cout << "Appending : "; cout << std::boolalpha << file_append << endl; cout << "Methods :"; for(map<string, bool>::iterator it = req_methods.begin(); it != req_methods.end(); ++it){ cout << " " << it->first; if(val_methods[it->first] != true){ cerr << "Error: " << it->first << " is not a valid method! " << endl; } } cout << endl; cout << "Calibrating timers" << endl; #ifdef MPI_VERSION } // main #endif ORB_calibrate(); // let's do some calculations Graph::Graph *g = new(Graph::Graph); Graph::GraphReader gr; Graph::GraphProperties gp; Graph::GraphUtil gu; #ifdef MPI_VERSION //int myrank; //MPI_Comm_rank(MPI_COMM_WORLD, &myrank); if(myrank == 0){ #endif // Set up output streams if(file_append == false){ outfile.open(outfilename.c_str()); } else { outfile.open(outfilename.c_str(), ios_base::out | ios_base::app); } if(!outfile.is_open()){ cerr << "Error opening " << outfilename << " for writing, exiting" << endl; exit(1); } #ifdef MPI_VERSION } #endif // Read in the graph and start recording things to output streams cout << "Reading graph" << endl; ORB_read(t1); if(gr.read_graph(g, infile, intype, false) == -1){ exit(1); } ORB_read(t2); if(outfile.tellp() == 0){ outfile << "filename " << infile << endl; outfile << "input_num_nodes " << g->get_num_nodes() << endl; outfile << "input_num_edges " << g->get_num_edges() << endl; } if(record_timings){ string of = outfilename + ".timings"; #ifdef MPI_VERSION if(0 == myrank){ #endif if(file_append == false){ timing_file.open(of.c_str()); } else { timing_file.open(of.c_str(), ios_base::out | ios_base::app); } if(!timing_file.is_open()){ cerr << "Error opening " << timing_file << " for writing, exiting" << endl; exit(1); } if(false == file_append){ outfile << "timing_file " << of << endl; } #ifdef MPI_VERSION } #endif } print_time(timing_file, "Time(read_graph)", t1, t2); if(apspinputfilename.length() != 0){ cout << "Reading APSP matrix from " << apspinputfilename << endl; vector< vector<int> > *apsp_dists = new vector< vector<int> >; ORB_read(t1); read_apsp_matrix(apspinputfilename, *apsp_dists); ORB_read(t2); print_time(timing_file, "Time(read_apsp_matrix)", t1, t2); g->set_shortest_path_dist(apsp_dists); } outfile.precision(16); vector<int> components; ORB_read(t1); gu.label_all_components(g, &components); ORB_read(t2); print_time(timing_file, "Time(label_all_components)", t1, t2); bool is_connected = gp.is_connected(g); cout << "Connected components: " << g->get_num_connected_components() << endl; //cout << "Graph is connected: " << std::boolalpha << is_connected << endl; run_all_methods(g, outfile, timing_file, outprefix, req_methods, file_append, spectrum_spread); outfile.close(); timing_file.close(); // some algorithms only make sense to run on a connected graph/component if(not is_connected and run_largest_cc){ // run everything against the other algorithms cout << "Graph is not connected, re-running stats on largest connected component" << endl; outfilename = outprefix + ".largest_component.stats"; if(file_append == false){ outfile.open(outfilename.c_str()); } else { outfile.open(outfilename.c_str(), ios_base::out | ios_base::app); } if(!outfile.is_open()){ cerr << "Error opening " << outfilename << " for writing, exiting" << endl; exit(1); } // get the largest component Graph::Graph *largest_component = gu.get_largest_component_graph(g); cerr << "Deleting g" << endl; delete(g); // delete g here to save on memory cerr << "g deleted" << endl; if(outfile.tellp() == 0){ #ifdef MPI_VERSION if(0 == myrank){ #endif outfile << "largest_component_from " << infile << endl; outfile << "input_num_nodes " << largest_component->get_num_nodes() << endl; outfile << "input_num_edges " << largest_component->get_num_edges() << endl; #ifdef MPI_VERSION } #endif } if(record_timings){ string of = outfilename + ".timings"; if(file_append == false){ timing_file.open(of.c_str()); #ifdef MPI_VERSION if(0 == myrank){ #endif outfile << "timing_file " << of << endl; #ifdef MPI_VERSION } #endif } else { timing_file.open(of.c_str(), ios_base::out | ios_base::app); } if(!timing_file.is_open()){ cerr << "Error opening " << timing_file << " for writing, exiting" << endl; exit(1); } } if(lcc_apspinputfilename.length() != 0){ cout << "Reading LCC APSP matrix from " << lcc_apspinputfilename << endl; vector< vector<int> > *apsp_dists = new vector< vector<int> >; ORB_read(t1); read_apsp_matrix(lcc_apspinputfilename, *apsp_dists); ORB_read(t2); print_time(timing_file, "Time(read_apsp_matrix)", t1, t2); largest_component->set_shortest_path_dist(apsp_dists); } outprefix = outprefix + ".largest_component"; outfile.precision(16); cerr << "Running methods on largest component" << endl; run_all_methods(largest_component, outfile, timing_file, outprefix, req_methods, file_append, spectrum_spread); outfile.close(); timing_file.close(); } #ifdef MPI_VERSION MPI_Finalize(); #endif exit(0); } // main
void run_all_methods(Graph::Graph *g, ofstream &outfile, ofstream &timing_file, string outprefix, std::map<string, bool> req_methods, bool &file_append, int spectrum_spread){ Graph::GraphReader gr; Graph::GraphProperties gp; Graph::GraphUtil gu; ORB_t t1, t2; double global_cc, avg_cc, assortativity; vector<double> local_cc, freq_ecc, norm_hops, eigen_spectrum; float edge_density, avg_degree, eff_diam; vector<int> deg_dist, ecc; int degeneracy, diam; vector<int> k_cores; vector<list<int> *> components; double avg_path_length; int xmin; double prob, lambda, alpha, KS, max_delta; vector<vector<double> > delta; vector<double> betweenness; Graph::Graph *largest_component; vector< vector<int> > shortest_path_distances; cout << "Simplifying graph" << endl; ORB_read(t1); gp.make_simple(g); ORB_read(t2); print_time(timing_file, "Time(make_simple)", t1, t2); int num_components = g->get_num_connected_components(); if(false == file_append){ outfile << "connected_components " << num_components << endl; outfile << "net_num_nodes " << g->get_num_nodes() << endl; outfile << "net_num_edges " << g->get_num_edges() << endl; } if(req_methods["edge_density"] == true){ cout << "Calculating edge density" << endl; ORB_read(t1); gp.edge_density(g, edge_density); ORB_read(t2); print_time(timing_file, "Time(edge_density)", t1, t2); outfile << "edge_density " << edge_density << endl; } if(req_methods["avg_degree"] == true){ cout << "Calculating average degree" << endl; ORB_read(t1); gp.avg_degree(g, avg_degree); ORB_read(t2); print_time(timing_file, "Time(average_degree)", t1, t2); outfile << "avg_degree " << avg_degree << endl; } if(req_methods["degree_dist"] == true){ cout << "Calculating degree distribution" << endl; ORB_read(t1); gp.deg_dist(g, deg_dist); ORB_read(t2); print_time(timing_file, "Time(degree_distribution)", t1, t2); string of = outprefix + ".deg_dist"; write_degree_distribution(of, deg_dist); outfile << "degree_distribution " << of << endl; } if(num_components != 1){ if(req_methods["component_sizes"] == true){ cout << "Calculating component sizes" << endl; ORB_read(t1); gu.find_all_components(g, &components); ORB_read(t2); print_time(timing_file, "Time(component_sizes)", t1, t2); string of = outprefix + ".component_sizes"; write_components(of, components); outfile << "component_sizes " << of << endl; } } if(req_methods["assortativity"] == true){ cout << "Calculating degree assortativity" << endl; ORB_read(t1); gp.deg_assortativity(g, assortativity); ORB_read(t2); print_time(timing_file, "Time(assortativity)", t1, t2); outfile << "assortativity " << assortativity << endl; } if((req_methods["degeneracy"] == true) || (req_methods["k_cores"] == true)){ cout << "Calculating k_cores and degeneracy" << endl; ORB_read(t1); degeneracy = gu.find_kcore(g, &k_cores); ORB_read(t2); print_time(timing_file, "Time(find_kcore)", t1, t2); outfile << "degeneracy " << degeneracy << endl; if(req_methods["k_cores"] == true){ string of = outprefix + ".kcores"; outfile << "kcore file " << of << endl; write_kcores(of, k_cores); } } if((req_methods["global_cc"] == true) || (req_methods["local_ccs"] == true) || (req_methods["avg_cc"] == true)){ cout << "Calculating clustering coefficients" << endl; ORB_read(t1); gp.clustering_coefficients(g, global_cc, avg_cc, local_cc); ORB_read(t2); print_time(timing_file, "Time(clustering_coeffecients)", t1, t2); if(req_methods["global_cc"] == true){ outfile << "global_clustering_coefficient " << global_cc << endl; } if(req_methods["avg_cc"] == true){ outfile << "average_clustering_coefficient " << avg_cc << endl; } if(req_methods["local_ccs"] == true){ string of = outprefix + ".local_ccs"; outfile << "local_ccs file " << of << endl; write_local_ccs(of, local_cc); } } if(req_methods["shortest_paths"] == true){ cout << "Calculating shortest paths" << endl; ORB_read(t1); gp.paths_dijkstra_all(g, shortest_path_distances); ORB_read(t2); print_time(timing_file, "Time(shortest_paths_dijkstra)", t1, t2); } #ifdef HAS_BOOST if((req_methods["shortest_paths_boost"] == true)){ cout << "Creating BOOST representation of g" << endl; ORB_read(t1); gu.populate_boost(g); ORB_read(t2); print_time(timing_file, "Time(populate_boost)", t1, t2); cout << "Calculating shortest paths (boost)" << endl; ORB_read(t1); gp.paths_dijkstra_boost_all(g, shortest_path_distances); ORB_read(t2); print_time(timing_file, "Time(shortest_paths_dijkstra_boost)", t1, t2); } if(req_methods["betweenness"]){ /* cout << "Creating BOOST representation of g" << endl; ORB_read(t1); gu.populate_boost(g); ORB_read(t2); print_time(timing_file, "Time(populate_boost)", t1, t2); */cout << "Calculating betweeneess centrality" << endl; ORB_read(t1); gp.betweenness_centrality(g, betweenness); ORB_read(t2); print_time(timing_file, "Time(betweenness_centrality)",t1,t2); string of = outprefix + ".betweenness"; outfile << "betweenness_file " << of << endl; write_betweenness(of, g->get_betweenness_ref()); } #else // ifdef HAS_BOOST cerr << "Error: BOOST support was not compiled, cannot run shortest_paths_boost or betweenness" << endl; #endif // ifdef HAS_BOOST if(num_components == 1){ if(req_methods["eccentricity"] == true){ cout << "Calculating eccentricities" << endl; ORB_read(t1); gp.eccentricity(g, ecc); ORB_read(t2); print_time(timing_file, "Time(eccentricity)",t1,t2); string of = outprefix + ".eccentricity"; outfile << "eccentricity_file " << of << endl; write_eccentricity(of, ecc); } if(req_methods["eccentricity_dist"] == true){ cout << "Calculating distribution of eccentricities" << endl; ORB_read(t1); gp.eccentricity_dist(g, ecc, freq_ecc); ORB_read(t2); print_time(timing_file, "Time(eccentricity distribution)",t1,t2); string of = outprefix + ".eccentricity_dist"; outfile << "eccentricity_dist_file " << of << endl; write_eccentricity_distribution(of, freq_ecc); } } else { cout << "Graph is disconnected - not calculating eccentricities" << endl; } if(req_methods["expansion"] == true){ cout << "Calculating normalized expansion (distance distribution) - no self loops allowed" << endl; ORB_read(t1); gp.expansion(g, norm_hops); ORB_read(t2); print_time(timing_file, "Time(expansion)",t1,t2); string of = outprefix + ".expansion"; outfile << "expansion_file " << of << endl; write_expansion(of, norm_hops); } if(req_methods["avg_shortest_path"] == true){ cout << "Calculating average shortest path length" << endl; ORB_read(t1); gp.avg_path_length(g, avg_path_length); ORB_read(t2); print_time(timing_file, "Time(avg_path_length)", t1, t2); outfile << "avg_path_length " << avg_path_length << endl; } if((req_methods["apsp_output"] == true)){ string of = outprefix + ".apsp"; ORB_read(t1); write_apsp_matrix(of, shortest_path_distances); ORB_read(t2); print_time(timing_file, "Time(write_apsp_matrix)", t1, t2); } #ifdef HAS_PETSC if(req_methods["eigen_spectrum"] == true){ //If petsc/slepc are present, initalize those. //If MPI support is added in the future, init MPI before Petsc. Petsc will do it's own MPI //init if MPI isn't already inited. #ifdef HAS_SLEPC SlepcInitializeNoArguments(); #elif HAVE_PETSC PetscInitializeNoArguments(); #endif if(spectrum_spread == 0){ spectrum_spread = 3; } cout << "Calculating adjacency matrix eigen spectrum\n"; ORB_read(t1); gp.eigen_spectrum(g, eigen_spectrum, spectrum_spread); ORB_read(t2); print_time(timing_file, "Time(eigen spectrum)",t1,t2); #ifdef MPI_VERSION int myrank; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); if(myrank == 0){ #endif outfile << "eigen_spectrum "; if(eigen_spectrum.size() > 0){ outfile << eigen_spectrum[0]; } for(int idx = 1; idx < eigen_spectrum.size(); idx++){ outfile << ", " << eigen_spectrum[idx]; } outfile << "\n"; #ifdef MPI_VERSION } #endif } #endif // ifdef HAS_PETSC if(num_components == 1){ if(req_methods["delta_hyperbolicity"] == true){ cout << "Calculating delta hyperbolicity" << endl; ORB_read(t1); gp.delta_hyperbolicity(g, max_delta, delta); ORB_read(t2); print_time(timing_file, "Time(delta_hyperbolicity)", t1, t2); //outfile << "delta_hyperbolicity " << max_delta << endl; //for(int idx = 0; idx < delta.size(); idx++){ // for(int jdx = 0; jdx < delta[idx].size(); jdx++){ // outfile << delta[idx][jdx] << " "; // } // outfile << endl; //} string of = outprefix + ".delta_hyp"; write_delta_hyperbolicity(of, delta); outfile << "max_delta_hyperbolicity " << max_delta; } } else { cout << "Graph is disconnected - not calculating delta hyperbolicity" << endl; } if(req_methods["diameter"] == true){ cout << "Calculating diameter" << endl; ORB_read(t1); gp.diameter(g, diam); ORB_read(t2); print_time(timing_file, "Time(diameter)", t1, t2); outfile << "diameter " << diam << endl; } if(req_methods["effective_diameter"] == true){ cout << "Calculating effective diameter" << endl; ORB_read(t1); gp.effective_diameter(g, eff_diam); ORB_read(t2); print_time(timing_file, "Time(effective_diameter)", t1, t2); outfile << "effective_diameter " << eff_diam << endl; } outfile.close(); #ifdef HAS_SLEPC SlepcFinalize(); #elif HAVE_PETSC PetscFinalize(); #endif } // run_all_methods