ReductionType map_reduce_vertices(GraphType& g, MapFunctionType mapfunction, const vertex_set& vset = GraphType::complete_set()) { BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>)); BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>)); typedef typename GraphType::vertex_type vertex_type; if(!g.is_finalized()) { logstream(LOG_FATAL) << "\n\tAttempting to run graph.map_reduce_vertices(...) " << "\n\tbefore calling graph.finalize()." << std::endl; } g.dc().barrier(); bool global_result_set = false; ReductionType global_result = ReductionType(); #ifdef _OPENMP #pragma omp parallel #endif { bool result_set = false; ReductionType result = ReductionType(); #ifdef _OPENMP #pragma omp for #endif for (int i = 0; i < (int)g.num_local_vertices(); ++i) { auto lvertex = g.l_vertex(i); if (lvertex.owned() && vset.l_contains(lvid_type(i))) { if (!result_set) { vertex_type vtx(lvertex); result = mapfunction(vtx); result_set = true; } else if (result_set){ const vertex_type vtx(lvertex); const ReductionType tmp = mapfunction(vtx); result += tmp; } } } #ifdef _OPENMP #pragma omp critical #endif { if (result_set) { if (!global_result_set) { global_result = result; global_result_set = true; } else { global_result += result; } } } } conditional_addition_wrapper<ReductionType> wrapper(global_result, global_result_set); g.dc().all_reduce(wrapper); return wrapper.value; } // end of map_reduce_vertices
bool load_binary(GraphType& g, const std::string& prefix) { g.dc().full_barrier(); std::string fname = prefix + tostr(g.procid()) + ".bin"; logstream(LOG_INFO) << "Load graph from " << fname << std::endl; general_ifstream fin(fname, true); if(!fin.good()) { logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl; return false; } iarchive iarc(fin); iarc >> g; logstream(LOG_INFO) << "Finish loading graph from " << fname << std::endl; g.dc().full_barrier(); return true; } // end of load
void load(GraphType& g, std::string prefix, line_parser_type<GraphType> line_parser) { if (prefix.length() == 0) return; g.dc().full_barrier(); g.clear(); std::string directory_name; std::string original_path(prefix); boost::filesystem::path path(prefix); std::string search_prefix; if (boost::filesystem::is_directory(path)) { // if this is a directory // force a "/" at the end of the path // make sure to check that the path is non-empty. (you do not // want to make the empty path "" the root path "/" ) directory_name = path.native(); } else { directory_name = path.parent_path().native(); search_prefix = path.filename().native(); directory_name = (directory_name.empty() ? "." : directory_name); } std::vector<std::string> graph_files; fs_util::list_files_with_prefix(directory_name, search_prefix, graph_files); if (graph_files.size() == 0) { logstream(LOG_WARNING) << "No files found matching " << original_path << std::endl; } parallel_for(0, graph_files.size(), [&](size_t i) { if (i % g.numprocs() == g.procid()) { logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl; general_ifstream fin(graph_files[i]); if(!fin.good()) { log_and_throw_io_failure("Cannot open file: " + graph_files[i]); } const bool success = load_from_stream(g, graph_files[i], fin, line_parser); if(!success) { log_and_throw_io_failure("Fail parsing file: " + graph_files[i]); } } }); g.dc().full_barrier(); g.finalize(); } // end of load
void parallel_for_edges(GraphType& g, std::vector<EdgeFunctorType>& accfunction) { typedef typename GraphType::local_edge_type local_edge_type; typedef typename GraphType::edge_type edge_type; ASSERT_TRUE(g.is_finalized()); g.dc().barrier(); int numaccfunctions = (int)accfunction.size(); ASSERT_GE(numaccfunctions, 1); #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < (int)accfunction.size(); ++i) { for (int j = i;j < (int)g.num_local_vertices(); j+=numaccfunctions) { for (const local_edge_type& e : g.l_vertex(j).in_edges()) { accfunction[i](edge_type(e)); } } } g.dc().barrier(); }
void parallel_for_vertices(GraphType& g, std::vector<VertexFunctorType>& accfunction) { typedef typename GraphType::vertex_type vertex_type; ASSERT_TRUE(g.is_finalized()); g.dc().barrier(); int numaccfunctions = (int)accfunction.size(); ASSERT_GE(numaccfunctions, 1); #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < (int)accfunction.size(); ++i) { for (int j = i;j < (int)g.num_local_vertices(); j+=numaccfunctions) { auto lvertex = g.l_vertex(j); if (lvertex.owned()) { accfunction[i](vertex_type(lvertex)); } } } g.dc().barrier(); }
bool save_binary(const GraphType& g, const std::string& prefix) { g.dc().full_barrier(); ASSERT_TRUE (g.is_finalized()); timer savetime; savetime.start(); std::string fname = prefix + tostr(g.procid()) + ".bin"; logstream(LOG_INFO) << "Save graph to " << fname << std::endl; general_ofstream fout(fname, true); if (!fout.good()) { logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl; return false; } oarchive oarc(fout); oarc << g; logstream(LOG_INFO) << "Finish saving graph to " << fname << std::endl << "Finished saving binary graph: " << savetime.current_time() << std::endl; g.dc().full_barrier(); fout.close(); return true; } // end of save
void transform_vertices(GraphType& g, TransformType transform_functor, const vertex_set vset = GraphType::complete_set()) { typedef typename GraphType::vertex_type vertex_type; if(!g.is_finalized()) { logstream(LOG_FATAL) << "\n\tAttempting to call graph.transform_vertices(...)" << "\n\tbefore finalizing the graph." << std::endl; } g.dc().barrier(); size_t ibegin = 0; size_t iend = g.num_local_vertices(); parallel_for (ibegin, iend, [&](size_t i) { auto lvertex = g.l_vertex(i); if (lvertex.owned() && vset.l_contains(lvid_type(i))) { vertex_type vtx(lvertex); transform_functor(vtx); } }); g.dc().barrier(); g.synchronize(); }
void save(GraphType& g, const std::string& prefix, Writer writer, bool gzip = true, bool save_vertex = true, bool save_edge = true, size_t files_per_machine = 4) { typedef typename GraphType::vertex_type vertex_type; typedef typename GraphType::edge_type edge_type; typedef std::function<void(vertex_type)> vertex_function_type; typedef std::function<void(edge_type)> edge_function_type; typedef boost::iostreams::filtering_stream<boost::iostreams::output> boost_fstream_type; if(!g.is_finalized()) { g.finalize(); } g.dc().full_barrier(); // figure out the filenames std::vector<std::string> graph_files; std::vector<union_fstream*> outstreams; std::vector<boost_fstream_type*> booststreams; graph_files.resize(files_per_machine); for(size_t i = 0; i < files_per_machine; ++i) { graph_files[i] = prefix + "_" + tostr(1 + i + g.procid() * files_per_machine) + "_of_" + tostr(g.numprocs() * files_per_machine); if (gzip) graph_files[i] += ".gz"; } // create the vector of callbacks std::vector<vertex_function_type> vertex_callbacks(graph_files.size()); std::vector<edge_function_type> edge_callbacks(graph_files.size()); for(size_t i = 0; i < graph_files.size(); ++i) { logstream(LOG_INFO) << "Saving to file: " << graph_files[i] << std::endl; union_fstream* out_file = new union_fstream(graph_files[i], std::ios_base::out | std::ios_base::binary); // attach gzip if the file is gzip boost_fstream_type* fout = new boost_fstream_type; // Using gzip filter if (gzip) fout->push(boost::iostreams::gzip_compressor()); fout->push(*(out_file->get_ostream())); outstreams.push_back(out_file); booststreams.push_back(fout); vertex_callbacks[i] = [&](vertex_type v) {*fout << writer.save_vertex(v);}; edge_callbacks[i] = [&](edge_type e) {*fout << writer.save_edge(e);}; } if (save_vertex) parallel_for_vertices(g, vertex_callbacks); if (save_edge) parallel_for_edges(g, edge_callbacks); // cleanup for(size_t i = 0; i < graph_files.size(); ++i) { booststreams[i]->pop(); if (gzip) booststreams[i]->pop(); outstreams[i]->close(); delete booststreams[i]; delete outstreams[i]; } vertex_callbacks.clear(); edge_callbacks.clear(); outstreams.clear(); booststreams.clear(); g.dc().full_barrier(); } // end of save to posixfs