Exemplo n.º 1
0
int main(int argc, char** argv) {
    graphlab::mpi_tools::init(argc, argv);

    char *input_file = "hdfs://master:9000/pullgel/twitter";
    char *output_file = "hdfs://master:9000/exp/twitter";
    ROUND = 10;
    graphlab::distributed_control dc;
    global_logger().set_log_level(LOG_INFO);

    graphlab::timer t;
    t.start();
    graph_type graph(dc);
    graph.load(input_file, line_parser);
    graph.finalize();

    dc.cout() << "Loading graph in " << t.current_time() << " seconds" << std::endl;
    std::string exec_type = "synchronous";

    graphlab::omni_engine<pagerank> engine(dc, graph, exec_type);

    engine.signal_all();
    engine.start();

    dc.cout() << "Finished Running engine in " << engine.elapsed_seconds()
        << " seconds." << std::endl;

    const double total_rank = graph.map_reduce_vertices<double>(map_rank);
    std::cout << "Total rank: " << total_rank << std::endl;


    t.start();

    graph.save(output_file, pagerank_writer(), false, // set to true if each output file is to be gzipped
            true, // whether vertices are saved
            false); // whether edges are saved
    dc.cout() << "Dumping graph in " << t.current_time() << " seconds" << std::endl;

    graphlab::mpi_tools::finalize();
}
Exemplo n.º 2
0
int main(int argc, char** argv) {
  // Initialize control plain using mpi
//  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("engine", exec_type,
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  clopts.attach_option("format", format,
                       "The graph file format");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  clopts.attach_option("iterations", ITERATIONS,
                       "If set, will force the use of the synchronous engine"
                       "overriding any engine option set by the --engine parameter. "
                       "Runs complete (non-dynamic) PageRank for a fixed "
                       "number of iterations. Also overrides the iterations "
                       "option in the engine");
  clopts.attach_option("use_delta", USE_DELTA_CACHE,
                       "Use the delta cache to reduce time in gather.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }


  // Enable gather caching in the engine
  clopts.get_engine_args().set_option("use_cache", USE_DELTA_CACHE);

  if (ITERATIONS) {
    // make sure this is the synchronous engine
    dc.cout() << "--iterations set. Forcing Synchronous engine, and running "
              << "for " << ITERATIONS << " iterations." << std::endl;
    clopts.get_engine_args().set_option("type", "synchronous");
    clopts.get_engine_args().set_option("max_iterations", ITERATIONS);
    clopts.get_engine_args().set_option("sched_allv", true);
  }

  graphlab::timer timer;

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }

  double load_time = timer.current_time();
  timer.start();

  // must call finalize before querying the graph
  graph.finalize();

  double finalize_time = timer.current_time();
  double ingress_time = load_time + finalize_time;
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  timer.start();
  engine.start();
  const double runtime = timer.current_time();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;


  const double total_rank = graph.map_reduce_vertices<double>(map_rank);
  std::cout << "Total rank: " << total_rank << std::endl;

  const double replication_factor = (double)graph.num_replicas()/graph.num_vertices();

  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  double totalpr = graph.map_reduce_vertices<double>(pagerank_sum);
  std::cout << "Totalpr = " << totalpr << "\n";

  if (dc.procid() == 0) {
    const std::string output_filename = "/projects/sciteam/jsb/shin1/output.csv";
    std::ofstream ofs;
    ofs.open(output_filename.c_str(), std::ios::out | std::ios::app);
    if (!ofs.is_open()) {
      std::cout << "Failed to open output file.\n";
      return EXIT_FAILURE;
    }
    std::string ingress_method = "";
    clopts.get_graph_args().get_option("ingress", ingress_method);

    bool topology_aware = dc.topology_aware();

    double num_master2mirror_hops = (double) graph.num_master2mirror_hops() / graph.num_vertices();

    double average_local_own_nverts = graph.average_num_local_own_vertices();

    double variance_local_own_nverts = graph.variance_num_local_own_vertices();

    // algorithm,partitioning_strategy,num_iterations,replication_factor,load_time,finalize_time,ingress_time,computation_time,total_time,topology_aware,master2mirror_hops,average_local_own_nverts,variance_local_own_nverts
    ofs << "pagerank," << ingress_method << "," << ITERATIONS << "," << replication_factor << "," << load_time << "," << finalize_time << "," << ingress_time << "," << runtime << "," << (ingress_time + runtime) << "," << topology_aware << "," << num_master2mirror_hops<< "," << average_local_own_nverts<< "," << variance_local_own_nverts << std::endl;

    ofs.close();

    std::cout << "Topologies:\n";
    for (size_t i = 0; i < dc.topologies().size(); ++i) {
      std::cout << "procid: " << i << ": ";
      std::vector<int> coord = dc.topologies()[i];
      for (size_t j = 0; j < coord.size(); ++j) {
        std::cout << coord[j] << " ";
      }
      std::cout << std::endl;
    }
  }

  // Tear-down communication layer and quit -----------------------------------
//  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main
Exemplo n.º 3
0
int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  engine.start();
  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;

  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main
Exemplo n.º 4
0
int main(int argc, char** argv) {
  graphlab::timer total_timer; total_timer.start();

  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("engine", exec_type,
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  clopts.attach_option("format", format,
                       "The graph file format");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  clopts.attach_option("iterations", ITERATIONS,
                       "If set, will force the use of the synchronous engine"
                       "overriding any engine option set by the --engine parameter. "
                       "Runs complete (non-dynamic) PageRank for a fixed "
                       "number of iterations. Also overrides the iterations "
                       "option in the engine");
  clopts.attach_option("use_delta", USE_DELTA_CACHE,
                       "Use the delta cache to reduce time in gather.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }


  // Enable gather caching in the engine
  clopts.get_engine_args().set_option("use_cache", USE_DELTA_CACHE);

  if (ITERATIONS) {
    // make sure this is the synchronous engine
    dc.cout() << "--iterations set. Forcing Synchronous engine, and running "
              << "for " << ITERATIONS << " iterations." << std::endl;
    clopts.get_engine_args().set_option("type", "synchronous");
    clopts.get_engine_args().set_option("max_iterations", ITERATIONS);
    clopts.get_engine_args().set_option("sched_allv", true);
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  engine.start();
  const double runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;


  const double total_rank = graph.map_reduce_vertices<double>(map_rank);
  std::cout << "Total rank: " << total_rank << std::endl;

  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  // this interferes with TOTAL TIME print out
  //double totalpr = graph.map_reduce_vertices<double>(pagerank_sum);
  //std::cout << "Totalpr = " << totalpr << "\n";

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  dc.cout() << "TOTAL TIME (sec): " << total_timer.current_time() << std::endl;
  return EXIT_SUCCESS;
} // End of main
Exemplo n.º 5
0
void start(graphlab::command_line_options& clopts,
           std::string & graph_dir,
           std::string & format,
           std::string & exec_type,
           std::string & saveprefix) {
    local_graph_type l_graph;
    std::vector<vertex_record> lvid2record_ref;
    std::vector<replica_record> replicas_lvid2record_ref;
    //typedef graphlab::hopscotch_map<graphlab::vertex_id_type, graphlab::lvid_type> hopscotch_map_type;
    //hopscotch_map_type vid2lvid_ref;
    //hopscotch_map_type replicas_vid2lvid_ref;

    unsigned int iter = 0;
    int iteration_number = 0;
    int surviving_server_count;
    bool replacement_server = atoi(getenv("REPLACEMENT")) != 0;
    int replacement_server_count = atoi(getenv("REPLACEMENT_COUNT"));
    bool ground_truth = atoi(getenv("GROUND_TRUTH")) != 0;
    bool proactive_replication = atoi(getenv("PROACTIVE_REPLICATION")) != 0;
    graphlab::reactive_zorro<pagerank>* rs;

    std::unordered_map<std::string, int> current_ip_to_id_map; // map from ip of process to its id for currently alive processes
    std::unordered_map<std::string, int> previous_ip_to_id_map; // map from ip of process to its id for previously alove processes
    std::vector<int> id_to_previous_id; // map from previous id of a process to its current ip, -1 if a process was not replaced

    while (1) {


        try {
            if (iter > 0) {
                // TODO: Sleep here if required to ensure that the count of surviving servers is accurate.
                // Consider all survivors and any replacement servers (if available) for the next execution.
                surviving_server_count = rs->stop_watching_zoookeeper(); // we can increase the surviving server count if replacements available
                std::cout << "Zorro: Surviving server count: " << surviving_server_count << std::endl;
                std::cout << "Zorro: Replacement server count: " << replacement_server_count << std::endl;
                setenv("ZK_NUMNODES", std::to_string(surviving_server_count + replacement_server_count).c_str(), 1);
            }


            // Initialize from ZooKeeper
            graphlab::dc_init_param initparam;
            graphlab::init_param_from_zookeeper(initparam);
            graphlab::distributed_control dc(initparam);

            // Initialize without ZooKeeper
            //graphlab::distributed_control dc;

            if (iter > 0) {
                rs->stop(); // end previous reactive_server session
                delete rs;
            }

            // Initialize the graph
            graph_type graph(dc, clopts);
            std::cout << "Zorro: Graph initialized." << std::endl;

            // Initialize synchronous engine
            graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
            auto sync_engine_ptr = static_cast<graphlab::synchronous_engine<pagerank> * >(engine.get_engine_ptr());

            // Set local_graph and lvid2record map in synchronous engine.
            // These are required to trasnfer surviving vertex values to replacement servers.
            if (!replacement_server) {
                sync_engine_ptr->set_l_graph_backup(&l_graph);
                sync_engine_ptr->set_lvid2record_backup(&lvid2record_ref);
                if (proactive_replication) {
                    sync_engine_ptr->set_replicas_lvid2record_backup(&replicas_lvid2record_ref);
                }
            }

            std::cout << "Zorro: Constructing id maps" << std::endl;
            // Waitless Zorro maps previous process ids to new process ids
            std::vector<std::string>& machines = initparam.machines;
            for (int i = 0; i < machines.size(); ++i) {
                std::cout << machines[i] << std::endl;
                size_t pos = machines[i].find(":");
                current_ip_to_id_map[machines[i].substr(0, pos)] = i;
            }


            if (iter == 0) {
                id_to_previous_id.resize(dc.numprocs());
                // TODO: Figure out the most efficient method to copy unordered_maps
                previous_ip_to_id_map.insert(current_ip_to_id_map.begin(), current_ip_to_id_map.end());
            }
            std::cout << "Zorro: Constructing id to id map" << std::endl;

            // Map previous proc ids to new proc ids
            for (auto element : previous_ip_to_id_map) {
                if (current_ip_to_id_map.find(element.first) != current_ip_to_id_map.end()) {
                    id_to_previous_id[element.second] = current_ip_to_id_map[element.first];
                } else {
                    id_to_previous_id[element.second] = -1;
                }
            }

            for (size_t i = 0; i < id_to_previous_id.size(); ++i) {
                std::cout << "Previous id: " << i << " now has an id: " << id_to_previous_id[i] << std::endl;
            }

            std::cout << "Zorro: Initializing reactive server" << std::endl;
            rs = new graphlab::reactive_zorro<pagerank>();
            rs->init(&dc, &engine, iteration_number);
            rs->wait_for_join();

            std::vector< std::pair<size_t, bool> > server_list =
                extract_and_set_iteration(sync_engine_ptr, rs->get_received_servers());
            sync_engine_ptr->set_server_list(server_list);
            std::cout << "Zorro: All processes have joined: " << server_list.size() << std::endl;
            sync_engine_ptr->resize_received_state_container(dc.numprocs());
            dc.barrier();

            graphlab::thread_group tgroup;
            // Broadcast surviving state if not a replacement server
            if(iter > 0 && !replacement_server) {
                std::cout << "Zorro: Rebuilding state by requesting vertex data." << std::endl;
                if (proactive_replication) {
                    sync_engine_ptr->broadcast_surviving_state_replicas(tgroup, surviving_server_count, id_to_previous_id);
                } else {
                    sync_engine_ptr->broadcast_surviving_state(tgroup, surviving_server_count, id_to_previous_id);
                }
            }

            // Load the graph
            graph.load_format(graph_dir, format);
            // must call finalize before querying the graph
            graph.finalize();

            std::cout << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

            // Initialize the vertex data
            sync_engine_ptr->resize(); // Separately called to enable parallel graph loading with recovery
            graph.transform_vertices(init_vertex);

            tgroup.join();
            std::vector<vid_vdata_vector_type> received_state;
            received_state = sync_engine_ptr->get_received_state();
            std::cout << "Zorro: Receive surviving state thread joined. Now merging." << std::endl;

            // Merge received state into primary graph state
            graphlab::thread_group tgroup_merge;
            for (size_t i = 0; i < received_state.size(); ++i) {
                tgroup_merge.launch(boost::bind(&merge_values, boost::ref(received_state[i]), boost::ref(graph)));
            }
            tgroup_merge.join();

            engine.signal_all();

            try {
                engine.start();
                rs->stop();

                const double runtime = engine.elapsed_seconds();
                std::cout << "Zorro: Completed Running engine in " << runtime << " seconds." << std::endl;

                if (ground_truth) {
                    write_results("/home/lamport/output_ground", graph); // write ground truth
                } else {
                    write_results("/home/lamport/output", graph); // write result with failures
                }

                // Save the final graph
                if (saveprefix != "") {
                    std::cout << "SAVING FILE" << std::endl;
                    graph.save(saveprefix, pagerank_writer(),
                               false,    // do not gzip
                               true,     // save vertices
                               false);   // do not save edges
                }
                return;
            } catch(graphlab::failure_exception e) {
                l_graph = std::move(graph.get_local_graph());
                //vid2lvid_ref = std::move(graph.get_vid2lvid());
                lvid2record_ref = std::move(graph.get_lvid2record());

                if (proactive_replication) {
                    //replicas_vid2lvid_ref = std::move(graph.get_replicas_vid2lvid());
                    replicas_lvid2record_ref = std::move(graph.get_replicas_lvid2record());
                }
                iteration_number = sync_engine_ptr->iteration();

                // TODO: Figure out the most efficient method to copy unordered_maps
                previous_ip_to_id_map.clear();
                previous_ip_to_id_map.insert(current_ip_to_id_map.begin(), current_ip_to_id_map.end());
                current_ip_to_id_map.clear();
                throw e;
            }
        } catch(graphlab::failure_exception) {
            std::cout << "Zorro: Failed Running engine at iteration: " << iter << ". Recovering..." << std::endl;
            ++iter;
            continue;
        }
    }
}