Пример #1
0
    ReductionType map_reduce_vertices(GraphType& g,
                                      MapFunctionType mapfunction,
                                      const vertex_set& vset = GraphType::complete_set()) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      typedef typename GraphType::vertex_type vertex_type;

      if(!g.is_finalized()) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to run graph.map_reduce_vertices(...) "
          << "\n\tbefore calling graph.finalize()."
          << std::endl;
      }
      g.dc().barrier();
      bool global_result_set = false;
      ReductionType global_result = ReductionType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        bool result_set = false;
        ReductionType result = ReductionType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)g.num_local_vertices(); ++i) {
          auto lvertex = g.l_vertex(i);
          if (lvertex.owned() && vset.l_contains(lvid_type(i))) {
            if (!result_set) {
              vertex_type vtx(lvertex);
              result = mapfunction(vtx);
              result_set = true;
            }
            else if (result_set){
              const vertex_type vtx(lvertex);
              const ReductionType tmp = mapfunction(vtx);
              result += tmp;
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (result_set) {
            if (!global_result_set) {
              global_result = result;
              global_result_set = true;
            }
            else {
              global_result += result;
            }
          }
        }
      }
      conditional_addition_wrapper<ReductionType>
        wrapper(global_result, global_result_set);
      g.dc().all_reduce(wrapper);
      return wrapper.value;
    } // end of map_reduce_vertices
Пример #2
0
    bool load_binary(GraphType& g, const std::string& prefix) {
      g.dc().full_barrier();
      std::string fname = prefix + tostr(g.procid()) + ".bin";

      logstream(LOG_INFO) << "Load graph from " << fname << std::endl;
      general_ifstream fin(fname, true);
      if(!fin.good()) {
        logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
        return false;
      }
      iarchive iarc(fin);
      iarc >> g;
      logstream(LOG_INFO) << "Finish loading graph from " << fname << std::endl;
      g.dc().full_barrier();
      return true;
    } // end of load
Пример #3
0
    void load(GraphType& g, std::string prefix,
              line_parser_type<GraphType> line_parser) {
      if (prefix.length() == 0)
        return;
      g.dc().full_barrier();
      g.clear();
      std::string directory_name; std::string original_path(prefix);
      boost::filesystem::path path(prefix);
      std::string search_prefix;
      if (boost::filesystem::is_directory(path)) {
        // if this is a directory
        // force a "/" at the end of the path
        // make sure to check that the path is non-empty. (you do not
        // want to make the empty path "" the root path "/" )
        directory_name = path.native();
      } else {
        directory_name = path.parent_path().native();
        search_prefix = path.filename().native();
        directory_name = (directory_name.empty() ? "." : directory_name);
      }
      std::vector<std::string> graph_files;
      fs_util::list_files_with_prefix(directory_name, search_prefix, graph_files);
      if (graph_files.size() == 0) {
        logstream(LOG_WARNING) << "No files found matching " << original_path << std::endl;
      }

      parallel_for(0, graph_files.size(), [&](size_t i) {
        if (i % g.numprocs() == g.procid()) {
          logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl;
          general_ifstream fin(graph_files[i]);
          if(!fin.good()) {
            log_and_throw_io_failure("Cannot open file: " + graph_files[i]);
          }
          const bool success = load_from_stream(g, graph_files[i], fin, line_parser);
          if(!success) {
            log_and_throw_io_failure("Fail parsing file: " + graph_files[i]);
          }
        }
      });
      g.dc().full_barrier();
      g.finalize();
    } // end of load
Пример #4
0
    void parallel_for_edges(GraphType& g,
                            std::vector<EdgeFunctorType>& accfunction) {
      typedef typename GraphType::local_edge_type local_edge_type;
      typedef typename GraphType::edge_type edge_type;
      ASSERT_TRUE(g.is_finalized());
      g.dc().barrier();
      int numaccfunctions = (int)accfunction.size();
      ASSERT_GE(numaccfunctions, 1);
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)accfunction.size(); ++i) {
        for (int j = i;j < (int)g.num_local_vertices(); j+=numaccfunctions) {
          for (const local_edge_type& e : g.l_vertex(j).in_edges()) {
            accfunction[i](edge_type(e));
          }
        }
      }
      g.dc().barrier();
    }
Пример #5
0
    void parallel_for_vertices(GraphType& g,
                               std::vector<VertexFunctorType>& accfunction) {
      typedef typename GraphType::vertex_type vertex_type;
      ASSERT_TRUE(g.is_finalized());
      g.dc().barrier();
      int numaccfunctions = (int)accfunction.size();
      ASSERT_GE(numaccfunctions, 1);
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)accfunction.size(); ++i) {
        for (int j = i;j < (int)g.num_local_vertices(); j+=numaccfunctions) {
          auto lvertex = g.l_vertex(j);
          if (lvertex.owned()) {
            accfunction[i](vertex_type(lvertex));
          }
        }
      }
      g.dc().barrier();
    }
Пример #6
0
    bool save_binary(const GraphType& g, const std::string& prefix) {
      g.dc().full_barrier();
      ASSERT_TRUE (g.is_finalized());
      timer savetime;  savetime.start();
      std::string fname = prefix + tostr(g.procid()) + ".bin";
      logstream(LOG_INFO) << "Save graph to " << fname << std::endl;

      general_ofstream fout(fname, true);
      if (!fout.good()) {
        logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
        return false;
      }
      oarchive oarc(fout);
      oarc << g;
      logstream(LOG_INFO) << "Finish saving graph to " << fname << std::endl
                          << "Finished saving binary graph: "
                          << savetime.current_time() << std::endl;
      g.dc().full_barrier();
      fout.close();
      return true;
    } // end of save
Пример #7
0
 void transform_vertices(GraphType& g,
                         TransformType transform_functor,
                         const vertex_set vset = GraphType::complete_set()) {
   typedef typename GraphType::vertex_type vertex_type;
   if(!g.is_finalized()) {
     logstream(LOG_FATAL)
         << "\n\tAttempting to call graph.transform_vertices(...)"
         << "\n\tbefore finalizing the graph."
         << std::endl;
   }
   g.dc().barrier();
   size_t ibegin = 0;
   size_t iend = g.num_local_vertices();
   parallel_for (ibegin, iend, [&](size_t i) {
       auto lvertex = g.l_vertex(i);
       if (lvertex.owned() && vset.l_contains(lvid_type(i))) {
         vertex_type vtx(lvertex);
         transform_functor(vtx);
       }
     });
   g.dc().barrier();
   g.synchronize();
 }
Пример #8
0
    void save(GraphType& g,
              const std::string& prefix,
              Writer writer,
              bool gzip = true,
              bool save_vertex = true,
              bool save_edge = true,
              size_t files_per_machine = 4) {
      typedef typename GraphType::vertex_type vertex_type;
      typedef typename GraphType::edge_type edge_type;
      typedef std::function<void(vertex_type)> vertex_function_type;
      typedef std::function<void(edge_type)> edge_function_type;
      typedef boost::iostreams::filtering_stream<boost::iostreams::output>
          boost_fstream_type;

      if(!g.is_finalized()) {
        g.finalize();
      }

      g.dc().full_barrier();
      // figure out the filenames
      std::vector<std::string> graph_files;
      std::vector<union_fstream*> outstreams;
      std::vector<boost_fstream_type*> booststreams;

      graph_files.resize(files_per_machine);
      for(size_t i = 0; i < files_per_machine; ++i) {
        graph_files[i] = prefix + "_" + tostr(1 + i + g.procid() * files_per_machine)
            + "_of_" + tostr(g.numprocs() * files_per_machine);
        if (gzip) graph_files[i] += ".gz";
      }

      // create the vector of callbacks
      std::vector<vertex_function_type> vertex_callbacks(graph_files.size());
      std::vector<edge_function_type> edge_callbacks(graph_files.size());

      for(size_t i = 0; i < graph_files.size(); ++i) {
        logstream(LOG_INFO) << "Saving to file: " << graph_files[i] << std::endl;
        union_fstream* out_file =
            new union_fstream(graph_files[i], std::ios_base::out | std::ios_base::binary);
        // attach gzip if the file is gzip
        boost_fstream_type* fout = new boost_fstream_type;
        // Using gzip filter
        if (gzip) fout->push(boost::iostreams::gzip_compressor());
        fout->push(*(out_file->get_ostream()));
        outstreams.push_back(out_file);
        booststreams.push_back(fout);

        vertex_callbacks[i] = [&](vertex_type v) {*fout << writer.save_vertex(v);};
        edge_callbacks[i] = [&](edge_type e) {*fout << writer.save_edge(e);};
      }

      if (save_vertex) parallel_for_vertices(g, vertex_callbacks);
      if (save_edge) parallel_for_edges(g, edge_callbacks);

      // cleanup
      for(size_t i = 0; i < graph_files.size(); ++i) {
        booststreams[i]->pop();
        if (gzip) booststreams[i]->pop();
        outstreams[i]->close();
        delete booststreams[i];
        delete outstreams[i];
      }
      vertex_callbacks.clear();
      edge_callbacks.clear();
      outstreams.clear();
      booststreams.clear();
      g.dc().full_barrier();
    } // end of save to posixfs