示例#1
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* CE_Graph initialization will read the command line 
     arguments and the configuration file. */
  CE_Graph_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf2");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);

  distance_metric          = get_option_int("distance", JACCARD_WEIGHT);
      if (distance_metric != JACCARD_WEIGHT)
    logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl;
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();

  //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA)
  //  logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl;  

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true);

  assert(M > 0 && N > 0);

  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  /* Run */
  ItemDistanceProgram program;
  CE_Graph_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl;

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  return 0;
}
示例#2
0
void init_validation_rmse_engine(graphchi_engine<VertexDataType,EdgeDataType> *& pvalidation_engine, int nshards,float (*prediction_func)(const vertex_data & user, const vertex_data & movie, float rating, double & prediction, void * extra), bool _time_weighting, bool _time_nodes, int _matlab_time_offset){
  metrics * m = new metrics("validation_rmse_engine");
  graphchi_engine<VertexDataType, EdgeDataType> * engine = new graphchi_engine<VertexDataType, EdgeDataType>(validation, nshards, false, *m); 
  set_engine_flags(*engine);
  pvalidation_engine = engine;
  time_weighting = _time_weighting;
  time_nodes = _time_nodes;
  matlab_time_offset = _matlab_time_offset;
  pprediction_func = prediction_func;
  num_threads = number_of_omp_threads();
}
示例#3
0
int main(int argc, const char ** argv) {

  print_copyright();

  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("sgd-inmemory-factors");

  algorithm     = get_option_string("algorithm", "global_mean");
  if (algorithm == "global_mean")
    algo = GLOBAL_MEAN;
  else if (algorithm == "user_mean")
    algo = USER_MEAN;
  else if (algorithm == "item_mean")
    algo = ITEM_MEAN;
  else logstream(LOG_FATAL)<<"Unsupported algorithm name. Should be --algorithm=XX where XX is one of [global_mean,user_mean,item_mean] for example --algorithm=global_mean" << std::endl;


  parse_command_line_args();
  mytimer.start();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<float>(training, NULL, 0, 0, 3, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, false);
  rmse_vec = zeros(number_of_omp_threads());
  print_config();

  /* Run */
  BaselineVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine); 
  pengine = &engine;
  engine.run(program, 1);

  if (algo == USER_MEAN || algo == ITEM_MEAN)
    output_baseline_result(training);
  test_predictions(&baseline_predict);    

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
示例#4
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  debug                    = get_option_int("debug", debug);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

 logstream(LOG_INFO) << "M = " << M << std::endl;
  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA || distance_metric == PROB)
    latent_factors_inmem.resize(M);
  if (distance_metric == PROB)
    prob_sim_normalization_constant = (double)L / (double)(M*N-L);


  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
 
  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  delete[] relevant_items;

  /* write the matrix market info header to be used later */
  FILE * pmm = fopen((training + "-topk:info").c_str(), "w");
  if (pmm == NULL)
    logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl;
  fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n");
  fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs));
  fclose(pmm);

  /* sort output files */
  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); 

  return 0;
}
示例#5
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("itemsim2rating2");    

  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();
  std::string similarity   = get_option_string("similarity", "");
  if (similarity == "")
    logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl;
  undirected               = get_option_int("undirected", 0);
  
  mytimer.start();

  int nshards          = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, &degrees);

  assert(M > 0 && N > 0);
  prob_sim_normalization_constant = (double)L / (double)(M*N-L);
  
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s-rec.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }


  K 			   = get_option_int("K");
  assert(K > 0);
  //run the program
  engine.run(program, niters);

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);
  
  delete[] relevant_items;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);

  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl;

  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); 


  return 0;
}
示例#6
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA)
    latent_factors_inmem.resize(M);

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
  for (uint i=0; i< out_files.size(); i++){
    fflush(out_files[i]);
    fclose(out_files[i]);
  }

  std::cout<<"Created "  << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  delete[] relevant_items;
  return 0;
}