示例#1
0
void dd::FactorGraph::load(const CmdParser & cmd, const bool is_quiet) {

    // get factor graph file names from command line arguments
    std::string weight_file = cmd.weight_file->getValue();
    std::string variable_file = cmd.variable_file->getValue();
    std::string factor_file = cmd.factor_file->getValue();
    std::string edge_file = cmd.edge_file->getValue();

    std::string filename_edges = edge_file;
    std::string filename_factors = factor_file;
    std::string filename_variables = variable_file;
    std::string filename_weights = weight_file;

    // load variables
    long long n_loaded = read_variables(filename_variables, *this);
    assert(n_loaded == n_var);
    if (!is_quiet) {
        std::cout << "LOADED VARIABLES: #" << n_loaded << std::endl;
        std::cout << "         N_QUERY: #" << n_query << std::endl;
        std::cout << "         N_EVID : #" << n_evid << std::endl;
    }

    // load factors
    n_loaded = read_factors(filename_factors, *this);
    assert(n_loaded == n_factor);
    if (!is_quiet) {
        std::cout << "LOADED FACTORS: #" << n_loaded << std::endl;
    }

    // load weights
    n_loaded = read_weights(filename_weights, *this);
    assert(n_loaded == n_weight);
    if (!is_quiet) {
        std::cout << "LOADED WEIGHTS: #" << n_loaded << std::endl;
    }

    // sort the above components
    // NOTE This is very important, as read_edges assume variables,
    // factors and weights are ordered so that their id is the index
    // where they are stored in the array
    this->sort_by_id();

    // load edges
    n_loaded = read_edges(edge_file, *this);
    if (!is_quiet) {
        std::cout << "LOADED EDGES: #" << n_loaded << std::endl;
    }

    // construct edge-based store
    this->organize_graph_by_edge();
    this->safety_check();

    assert(this->is_usable() == true);

}
示例#2
0
int main(int argc, const char ** argv) {

  mytimer.start();
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("rating2");

  knn_sample_percent = get_option_float("knn_sample_percent", 1.0);
  if (knn_sample_percent <= 0 || knn_sample_percent > 1)
    logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl;

  num_ratings   = get_option_int("num_ratings", 10);
  if (num_ratings <= 0)
    logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl;

  debug         = get_option_int("debug", 0);
  tokens_per_row = get_option_int("tokens_per_row", tokens_per_row);
  std::string algorithm     = get_option_string("algorithm");
  /* Basic arguments for RBM algorithm */
  rbm_bins      = get_option_int("rbm_bins", rbm_bins);
  rbm_scaling   = get_option_float("rbm_scaling", rbm_scaling);

  if (algorithm == "svdpp" || algorithm == "svd++")
    algo = SVDPP;
  else if (algorithm == "biassgd")
    algo = BIASSGD;
  else if (algorithm == "rbm")
    algo = RBM;
  else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl;

  parse_command_line_args();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = 0;
  if (tokens_per_row == 3)
    nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false);
  else if (tokens_per_row == 4)
    nshards = convert_matrixmarket4<edge_data4>(training);
  else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl;

  assert(M > 0 && N > 0);
  latent_factors_inmem.resize(M+N); // Initialize in-memory vertices.

  //initialize data structure to hold the matrix read from file
  if (algo == RBM){
#pragma omp parallel for
    for (uint i=0; i< M+N; i++){
      if (i < M){
        latent_factors_inmem[i].pvec = zeros(D*3);
      }
      else {  
        latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D);
      }
    } 
  }
 
  read_factors(training);
  if ((uint)num_ratings > N){
    logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl;
    num_ratings = N;
  }
  srand(time(NULL));

  /* Run */
  if (tokens_per_row == 3){
    RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program;
    graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  } 
  else if (tokens_per_row == 4){
    RatingVerticesInMemProgram<VertexDataType, edge_data4> program;
    graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  }
  /* Output latent factor matrices in matrix-market format */
  output_knn_result(training);

  rating_stats();

  if (users_without_ratings > 0)
    logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl;

  if (users_no_ratings > 0)
    logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}