Example #1
0
int main(int argc, const char ** argv) {

  print_copyright();
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("als-inmemory-factors");

  lambda        = get_option_float("lambda", 0.065);

  parse_command_line_args();
  parse_implicit_command_line();
  if (unittest == 1){
    if (training == "") training = "test_wals"; 
    niters = 100;
  }

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket4<edge_data>(training);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  if (validation != ""){
    int vshards = convert_matrixmarket4<EdgeDataType>(validation, false, M==N, VALIDATION, 0);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &wals_predict, true, false, 0);
  }

  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }


  /* Run */
  WALSVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_als_result(training);
  test_predictions(&wals_predict);    

  if (unittest == 1){
    if (dtraining_rmse > 0.03)
      logstream(LOG_FATAL)<<"Unit test 1 failed. Training RMSE is: " << training_rmse << std::endl;
    if (dvalidation_rmse > 0.61)
      logstream(LOG_FATAL)<<"Unit test 1 failed. Validation RMSE is: " << validation_rmse << std::endl;

  }
 
  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
Example #2
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf2");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);

  distance_metric          = get_option_int("distance", JACCARD_WEIGHT);
      if (distance_metric != JACCARD_WEIGHT)
    logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl;
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();

  //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA)
  //  logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl;  

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true);

  assert(M > 0 && N > 0);

  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl;

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  return 0;
}
Example #3
0
int main(int argc, const char ** argv) {

  print_copyright();
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("als-inmemory-factors");

  lambda        = get_option_float("lambda", 0.065);
  user_sparsity = get_option_float("user_sparsity", 0.9);
  movie_sparsity = get_option_float("movie_sparsity", 0.9);
  algorithm      = get_option_int("algorithm", SPARSE_USR_FACTOR);

  parse_command_line_args();
  parse_implicit_command_line(); 

  if (user_sparsity < 0.5 || user_sparsity >= 1)
    logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --user_sparsity=XX in this range" << std::endl;

  if (movie_sparsity < 0.5 || movie_sparsity >= 1)
    logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --movie_sparsity=XX in this range" << std::endl;

if (algorithm != SPARSE_USR_FACTOR && algorithm != SPARSE_BOTH_FACTORS && algorithm != SPARSE_ITM_FACTOR)
    logstream(LOG_FATAL)<<"Algorithm should be 1 for SPARSE_USR_FACTOR, 2 for SPARSE_ITM_FACTOR and 3 for SPARSE_BOTH_FACTORS" << std::endl;

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<EdgeDataType>(training);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sparse_als_predict);
  }
 

  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  /* Run */
  ALSVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_als_result(training);
  test_predictions(&sparse_als_predict);    

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
Example #4
0
int main(int argc, const char ** argv) {

    print_copyright();

    //* GraphChi initialization will read the command line arguments and the configuration file. */
    graphchi_init(argc, argv);

    /* Metrics object for keeping track of performance counters
       and other information. Currently required. */
    metrics m("rbm-inmemory-factors");

    /* Basic arguments for RBM algorithm */
    rbm_bins      = get_option_int("rbm_bins", rbm_bins);
    rbm_alpha     = get_option_float("rbm_alpha", rbm_alpha);
    rbm_beta      = get_option_float("rbm_beta", rbm_beta);
    rbm_mult_step_dec  = get_option_float("rbm_mult_step_dec", rbm_mult_step_dec);
    rbm_scaling   = get_option_float("rbm_scaling", rbm_scaling);

    parse_command_line_args();
    parse_implicit_command_line();

    mytimer.start();

    /* Preprocess data if needed, or discover preprocess files */
    int nshards = convert_matrixmarket<float>(training);

    rbm_init();

    if (validation != "") {
        int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION);
        init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &rbm_predict);
    }

    /* load initial state from disk (optional) */
    if (load_factors_from_file) {
        load_matrix_market_matrix(training + "_U.mm", 0, 3*D);
        load_matrix_market_matrix(training + "_V.mm", M, rbm_bins*(D+1));
    }

    print_config();

    /* Run */
    RBMVerticesInMemProgram program;
    graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m);
    set_engine_flags(engine);
    pengine = &engine;
    engine.run(program, niters);

    /* Output latent factor matrices in matrix-market format */
    output_rbm_result(training);
    test_predictions(&rbm_predict);


    /* Report execution metrics */
    if (!quiet)
        metrics_report(m);
    return 0;
}
Example #5
0
int main(int argc, const char ** argv) {

	print_copyright();

	//* GraphChi initialization will read the command line arguments and the configuration file. */
	graphchi_init(argc, argv);

	/* Metrics object for keeping track of performance counters
	   and other information. Currently required. */

	/* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
	beta       = get_option_float("beta", 1);
	debug      = get_option_int("debug", 0);

	parse_command_line_args();
	parse_implicit_command_line();
	D          = 0; //no feature vector is needed
	binary_relevance_threshold = 0; //treat all edge values as binary

	/* Preprocess data if needed, or discover preprocess files */
	int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);
	init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
       
	//read initial vector from file 
	std::cout << "Load CTR vector from file" << training << ":vec" << std::endl;
	load_matrix_market_vector(training + ":vec", Y_POS, false, false);

        mu_ij    = zeros(M+N);
        sigma_ij = ones(M+N);

	if (validation != ""){
                //read validation data (optional)
		vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false);
		validation_targets = load_matrix_market_vector(validation + ":vec", false, false);                
                Me = validation_targets.size();
	}


	print_config();

	/* Run */
	AdPredictorVerticesInMemProgram program;
	metrics m("adpredictor");
	graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
        set_engine_flags(engine);
	pengine = &engine;
	engine.run(program, niters);

	/* Output latent factor matrices in matrix-market format */
	output_adpredictor_result(training);

	/* Report execution metrics */
	if (!quiet)
		metrics_report(m);

	return 0;
}
int main(int argc, const char ** argv) {
    print_copyright();

    /* GraphChi initialization will read the command line
       arguments and the configuration file. */
    graphchi_init(argc, argv);

    /* Metrics object for keeping track of performance counters
       and other information. Currently required. */
    metrics m("itemsim2rating2");

    /* Basic arguments for application */
    min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
    debug                    = get_option_int("debug", 0);
    parse_command_line_args();
    std::string similarity   = get_option_string("similarity", "");
    if (similarity == "")
        Rcpp::Rcerr<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl;
    undirected               = get_option_int("undirected", 1);
    Q                        = get_option_float("Q", Q);
    K 			   = get_option_int("K");

    mytimer.start();
    vec unused;
    int nshards          = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, unused);

    assert(M > 0 && N > 0);

    //initialize data structure which saves a subset of the items (pivots) in memory
    adjcontainer = new adjlist_container();

    //array for marking which items are conected to the pivot items via users.
    relevant_items = new bool[N];

    /* Run */
    ItemDistanceProgram program;
    graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m);
    set_engine_flags(engine);

    out_file = open_file((training + "-rec").c_str(), "w");

    //run the program
    engine.run(program, niters);

    /* Report execution metrics */
    if (!quiet)
        metrics_report(m);

    Rcpp::Rcout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl;

    if (zero_edges)
        Rcpp::Rcout<<"Found: " << zero_edges<< " user edges with weight zero. Those are ignored." <<std::endl;

    delete[] relevant_items;
    fclose(out_file);
    return 0;
}
Example #7
0
int main(int argc, const char ** argv) {
  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("climf-inmemory-factors");

  /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
  sgd_lambda    = get_option_float("sgd_lambda", 1e-3);
  sgd_gamma     = get_option_float("sgd_gamma", 1e-4);
  sgd_step_dec  = get_option_float("sgd_step_dec", 1.0);
  binary_relevance_thresh = get_option_float("binary_relevance_thresh", 0);
  halt_on_mrr_decrease = get_option_int("halt_on_mrr_decrease", 0);
  num_ratings = get_option_int("num_ratings", 10000); //number of top predictions over which we compute actual MRR
  verbose     = get_option_int("verbose", 0);
  debug       = get_option_int("debug", 0);

  parse_command_line_args();
  parse_implicit_command_line();

  /* Preprocess data if needed, or discover preprocess files */
  bool allow_square = false;
  int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, allow_square);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file, 0.01);

  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION);
    init_mrr_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards);
  }

  if (load_factors_from_file)
  {
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  print_config();

  /* Run */
  SGDVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m);
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_sgd_result(training);
  test_predictions(&climf_predict);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  return 0;
}
Example #8
0
int main(int argc, const char ** argv) {

  print_copyright();

  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("sgd-inmemory-factors");

  /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
  sgd_lambda    = get_option_float("sgd_lambda", 1e-3);
  sgd_gamma     = get_option_float("sgd_gamma", 1e-3);
  sgd_step_dec  = get_option_float("sgd_step_dec", 0.9);

  parse_command_line_args();
  parse_implicit_command_line();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<EdgeDataType>(training, NULL, 0, 0, 3, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, NULL, 0, 0, 3, VALIDATION, false);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sgd_predict);
  }

  /* load initial state from disk (optional) */
  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  print_config();

  /* Run */
  SGDVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_sgd_result(training);
  test_predictions(&sgd_predict);    

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  return 0;
}
Example #9
0
int main(int argc, const char ** argv) {


  print_copyright(); 
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);
  metrics m("nmf-inmemory-factors");

  parse_command_line_args();
  parse_implicit_command_line();

  niters *= 2; //each NMF iteration is composed of two sub iters

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<float>(training, 0, 0, 3, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false);
    if (vshards != -1)
       init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &nmf_predict);
  }
 
  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  sum_of_item_latent_features = zeros(D);
  sum_of_user_latent_feautres = zeros(D);

  /* Run */
  NMFVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_nmf_result(training);
  test_predictions(&nmf_predict);    

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);

  return 0;
}
Example #10
0
int main(int argc, const char ** argv) {

  print_copyright();

  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("sgd-inmemory-factors");

  algorithm     = get_option_string("algorithm", "global_mean");
  if (algorithm == "global_mean")
    algo = GLOBAL_MEAN;
  else if (algorithm == "user_mean")
    algo = USER_MEAN;
  else if (algorithm == "item_mean")
    algo = ITEM_MEAN;
  else logstream(LOG_FATAL)<<"Unsupported algorithm name. Should be --algorithm=XX where XX is one of [global_mean,user_mean,item_mean] for example --algorithm=global_mean" << std::endl;


  parse_command_line_args();
  mytimer.start();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<float>(training, NULL, 0, 0, 3, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, false);
  rmse_vec = zeros(number_of_omp_threads());
  print_config();

  /* Run */
  BaselineVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine); 
  pengine = &engine;
  engine.run(program, 1);

  if (algo == USER_MEAN || algo == ITEM_MEAN)
    output_baseline_result(training);
  test_predictions(&baseline_predict);    

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
Example #11
0
int main(int argc, const char ** argv) {

  print_copyright();
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("pmf-inmemory-factors");

  lambda        = get_option_float("lambda", 0.065);
  debug        = get_option_int("debug", debug);
  pmf_burn_in  = get_option_int("pmf_burn_in", pmf_burn_in);
  pmf_additional_output = get_option_int("pmf_additional_output", pmf_additional_output);
  
  parse_command_line_args();
  parse_implicit_command_line();


  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<edge_data>(training, NULL, 0, 0, 3, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  init_pmf();

  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  /* Run */
  PMFVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine, true);
  pengine = &engine;
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);

  return 0;
}
Example #12
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  debug                    = get_option_int("debug", debug);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

 logstream(LOG_INFO) << "M = " << M << std::endl;
  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA || distance_metric == PROB)
    latent_factors_inmem.resize(M);
  if (distance_metric == PROB)
    prob_sim_normalization_constant = (double)L / (double)(M*N-L);


  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
 
  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  delete[] relevant_items;

  /* write the matrix market info header to be used later */
  FILE * pmm = fopen((training + "-topk:info").c_str(), "w");
  if (pmm == NULL)
    logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl;
  fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n");
  fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs));
  fclose(pmm);

  /* sort output files */
  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); 

  return 0;
}
Example #13
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("itemsim2rating2");    

  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();
  std::string similarity   = get_option_string("similarity", "");
  if (similarity == "")
    logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl;
  undirected               = get_option_int("undirected", 0);
  
  mytimer.start();

  int nshards          = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, &degrees);

  assert(M > 0 && N > 0);
  prob_sim_normalization_constant = (double)L / (double)(M*N-L);
  
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s-rec.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }


  K 			   = get_option_int("K");
  assert(K > 0);
  //run the program
  engine.run(program, niters);

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);
  
  delete[] relevant_items;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);

  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl;

  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); 


  return 0;
}
Example #14
0
int main(int argc, const char ** argv) {


  print_copyright();  

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("libfm");

  //specific command line parameters for libfm
  libfm_rate = get_option_float("libfm_rate", libfm_rate);
  libfm_regw = get_option_float("libfm_regw", libfm_regw);
  libfm_regv = get_option_float("libfm_regv", libfm_regv);
  libfm_mult_dec = get_option_float("libfm_mult_dec", libfm_mult_dec);
  D = get_option_int("D", D);

  parse_command_line_args();
  parse_implicit_command_line();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket4<edge_data>(training, false);
  init_libfm();
  if (validation != ""){
    int vshards = convert_matrixmarket4<EdgeDataType>(validation, true, M==N, VALIDATION);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &libfm_predict, false, true, 1);
   }


  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
    load_matrix_market_matrix(training + "_T.mm", M+N, D);
    load_matrix_market_matrix(training + "_L.mm", M+N+K, D);
    vec user_bias =      load_matrix_market_vector(training +"_U_bias.mm", false, true);
    vec item_bias =      load_matrix_market_vector(training +"_V_bias.mm", false, true);
    vec time_bias =      load_matrix_market_vector(training+ "_T_bias.mm", false, true);
    vec last_item_bias = load_matrix_market_vector(training+"_L_bias.m", false, true);
    for (uint i=0; i<M+N+K+M; i++){
      if (i < M)
        latent_factors_inmem[i].bias = user_bias[i];
      else if (i <M+N)
        latent_factors_inmem[i].bias = item_bias[i-M];
      else if (i <M+N+K)
        latent_factors_inmem[i].bias = time_bias[i-M-N];
      else 
        latent_factors_inmem[i].bias = last_item_bias[i-M-N-K];
    }
    vec gm = load_matrix_market_vector(training + "_global_mean.mm", false, true);
    globalMean = gm[0];
}


  /* Run */
  LIBFMVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output test predictions in matrix-market format */
  output_libfm_result(training);
  test_predictions3(&libfm_predict, 1);    

  /* Report execution metrics */
  if (!quiet) 
    metrics_report(m);
  return 0;
}
int main(int argc, const char ** argv) {

  print_copyright();
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("label_propagation");

  alpha        = get_option_float("alpha", alpha);
  debug        = get_option_int("debug", debug);
  
  parse_command_line_args();


  //load graph (adj matrix) from file
  int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true);
  if (M != N)
    logstream(LOG_FATAL)<<"Label propagation supports only square matrices" << std::endl;

  init_feature_vectors<std::vector<vertex_data> >(M, latent_factors_inmem, false);
  
  //load seed initialization from file
  load_matrix_market_matrix(training + ".seeds", 0, D);

  #pragma omp parallel for
  for (int i=0; i< (int)M; i++){

    //normalize seed probabilities to sum up to one
    if (latent_factors_inmem[i].seed){
      assert(sum(latent_factors_inmem[i].pvec) != 0);
      latent_factors_inmem[i].pvec /= sum(latent_factors_inmem[i].pvec);
      continue;
    }
    //other nodes get random label probabilities
    for (int j=0; j< D; j++)
       latent_factors_inmem[i].pvec[j] = drand48();
  }

  /* load initial state from disk (optional) */
  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
  }

  /* Run */
  LPVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_lp_result(training);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  return 0;
}
Example #16
0
int main(int argc, const char ** argv) {

  print_copyright();
 
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("label_propagation");

  contexts_file = get_option_string("contexts");
  nouns_file = get_option_string("nouns"); 
  pos_seeds = get_option_string("pos_seeds");
  neg_seeds = get_option_string("neg_seeds");
  parse_command_line_args();

  load_map_from_txt_file(contexts.string2nodeid, contexts_file, 1);
  load_map_from_txt_file(nouns.string2nodeid, nouns_file, 1);
    //load graph (adj matrix) from file
  int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true);

  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem);

  load_seeds_from_txt_file(nouns.string2nodeid, pos_seeds, false);
  load_seeds_from_txt_file(nouns.string2nodeid, neg_seeds, true); 

#pragma omp parallel for
  for (int i=0; i< (int)M; i++){

    //normalize seed probabilities to sum up to one
    if (latent_factors_inmem[i].seed){
      if (sum(latent_factors_inmem[i].pvec) != 0)
      latent_factors_inmem[i].pvec /= sum(latent_factors_inmem[i].pvec);
      continue;
    }
    //other nodes get random label probabilities
    for (int j=0; j< D; j++)
       latent_factors_inmem[i].pvec[j] = drand48();
  }

  /* load initial state from disk (optional) */
  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
  }

  /* Run */
  COEMVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_coem_result(training);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  return 0;
}
Example #17
0
int main(int argc, const char ** argv) {

  mytimer.start();
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("rating2");

  knn_sample_percent = get_option_float("knn_sample_percent", 1.0);
  if (knn_sample_percent <= 0 || knn_sample_percent > 1)
    logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl;

  num_ratings   = get_option_int("num_ratings", 10);
  if (num_ratings <= 0)
    logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl;

  debug         = get_option_int("debug", 0);
  tokens_per_row = get_option_int("tokens_per_row", tokens_per_row);
  std::string algorithm     = get_option_string("algorithm");
  /* Basic arguments for RBM algorithm */
  rbm_bins      = get_option_int("rbm_bins", rbm_bins);
  rbm_scaling   = get_option_float("rbm_scaling", rbm_scaling);

  if (algorithm == "svdpp" || algorithm == "svd++")
    algo = SVDPP;
  else if (algorithm == "biassgd")
    algo = BIASSGD;
  else if (algorithm == "rbm")
    algo = RBM;
  else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl;

  parse_command_line_args();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = 0;
  if (tokens_per_row == 3)
    nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false);
  else if (tokens_per_row == 4)
    nshards = convert_matrixmarket4<edge_data4>(training);
  else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl;

  assert(M > 0 && N > 0);
  latent_factors_inmem.resize(M+N); // Initialize in-memory vertices.

  //initialize data structure to hold the matrix read from file
  if (algo == RBM){
#pragma omp parallel for
    for (uint i=0; i< M+N; i++){
      if (i < M){
        latent_factors_inmem[i].pvec = zeros(D*3);
      }
      else {  
        latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D);
      }
    } 
  }
 
  read_factors(training);
  if ((uint)num_ratings > N){
    logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl;
    num_ratings = N;
  }
  srand(time(NULL));

  /* Run */
  if (tokens_per_row == 3){
    RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program;
    graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  } 
  else if (tokens_per_row == 4){
    RatingVerticesInMemProgram<VertexDataType, edge_data4> program;
    graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  }
  /* Output latent factor matrices in matrix-market format */
  output_knn_result(training);

  rating_stats();

  if (users_without_ratings > 0)
    logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl;

  if (users_no_ratings > 0)
    logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
Example #18
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA)
    latent_factors_inmem.resize(M);

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
  for (uint i=0; i< out_files.size(); i++){
    fflush(out_files[i]);
    fclose(out_files[i]);
  }

  std::cout<<"Created "  << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  delete[] relevant_items;
  return 0;
}
Example #19
0
int main(int argc, const char ** argv) {

	print_copyright();

	/* GraphChi initialization will read the command line
	 arguments and the configuration file. */
	graphchi_init(argc, argv);

	/* Metrics object for keeping track of performance counters
	 and other information. Currently required. */
	metrics m("bsvd_coor-inmemory-factors");

	/* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
	alpha = get_option_float("alpha", 1.0);
	lambda = get_option_float("lambda", 1.0);

	parse_command_line_args();
	parse_implicit_command_line();

	/* Preprocess data if needed, or discover preprocess files */
	int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);

	// initialize features vectors
	std::string initType;
	switch (init_features_type) {
	case 1: // bounded random
		// randomly initialize feature vectors so that rmin < rate < rmax
		initType = "bounded-random";
		init_random_bounded<std::vector<vertex_data> >(latent_factors_inmem, !load_factors_from_file);
		break;
	case 2: // baseline
		initType = "baseline";
		init_baseline<std::vector<vertex_data> >(latent_factors_inmem);
		load_matrix_market_matrix(training + "-baseline_P.mm", 0, D);
		load_matrix_market_matrix(training + "-baseline_Q.mm", M, D);
		break;
	case 3: // random
		initType = "random";
		init_feature_vectors<std::vector<vertex_data> >(M + N, latent_factors_inmem, !load_factors_from_file);
		break;
	default: // random
		initType = "random";
		init_feature_vectors<std::vector<vertex_data> >(M + N, latent_factors_inmem, !load_factors_from_file);
	}

	if (validation != "") {
		int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false);
		init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &bsvd_predict);
	}

	/* load initial state from disk (optional) */
	if (load_factors_from_file) {
		load_matrix_market_matrix(training + "_U.mm", 0, D);
		load_matrix_market_matrix(training + "_V.mm", M, D);
	}

	/* Run */
	ALSVerticesInMemProgram program;
	graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m);
	set_engine_flags(engine);
	pengine = &engine;
	engine.run(program, niters);

	/* Output latent factor matrices in matrix-market format */
	output_als_result(training);
	test_predictions(&bsvd_predict);

	/* Report execution metrics */
	if (!quiet)
		metrics_report(m);

	return 0;
}
int main(int argc, const char ** argv) {

 // print_copyright();
  write_copyright();
  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("sgd-inmemory-factors");

  /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
  sgd_lambda    = get_option_float("sgd_lambda", 1e-3);
  sgd_gamma     = get_option_float("sgd_gamma", 1e-3);
  sgd_step_dec  = get_option_float("sgd_step_dec", 0.9);

  int file_format   = get_option_int("ff", 3);


  parse_command_line_args();
  parse_implicit_command_line();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, file_format, TRAINING, false);
  init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file);
  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sgd_predict);
  }

  /* load initial state from disk (optional) */
  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
  }

  print_config();

  /* Run */
  SGDVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;

  timer train_timer;
  engine.run(program, niters);
 // std::cout << "Trn Time for file test: " << std::setw(10) << train_timer.current_time() / niters << std::endl;

  std::ofstream ofs(result.c_str(), std::ofstream::out | std::ofstream::app);
  ofs << D << " " << train_timer.current_time() << " ";


  /* Run TopN program */
  n_top = get_option_int("n_int", 10);
  /*timer test_timer1;

  ofs << test_timer1.current_time() << " ";*/
  //run_general_topn_program(pengine, &latent_factors_inmem, &sgd_predict);
  timer index_timer;
  kd_Node* mroot = init_kdtree(&latent_factors_inmem);
  ofs << index_timer.current_time() << " ";
  timer test_timer;
  /* construct kd tree index */ 
//  ofs << "constructing index: " << test_timer.current_time() << " ";
  run_kd_topn_program(pengine, &latent_factors_inmem, mroot);

 // std::coua << "Tst Time: " << std::setw(10) << test_timer.current_time() << std::endl;
  ofs << test_timer.current_time() << std::endl;
  ofs.close();
  /* Output latent factor matrices in matrix-market format */
  output_sgd_result(training);
  test_predictions(&sgd_predict);    
  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  return 0;
}
Example #21
0
int main(int argc, const char ** argv) {

  print_copyright();

  //* GraphChi initialization will read the command line arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("svdpp-inmemory-factors");

  svdpp.step_dec  =   get_option_float("svdpp_step_dec", 0.9);
  svdpp.itmBiasStep  =   get_option_float("svdpp_item_bias_step", 1e-3);
  svdpp.itmBiasReg =   get_option_float("svdpp_item_bias_reg", 1e-3);
  svdpp.usrBiasStep  =   get_option_float("svdpp_user_bias_step", 1e-3);
  svdpp.usrBiasReg  =   get_option_float("svdpp_user_bias_reg", 1e-3);
  svdpp.usrFctrStep  =   get_option_float("svdpp_user_factor_step", 1e-3);
  svdpp.usrFctrReg  =   get_option_float("svdpp_user_factor_reg", 1e-3);
  svdpp.itmFctrReg =   get_option_float("svdpp_item_factor_reg", 1e-3);
  svdpp.itmFctrStep =   get_option_float("svdpp_item_factor_step", 1e-3);
  svdpp.itmFctr2Reg =   get_option_float("svdpp_item_factor2_reg", 1e-3);
  svdpp.itmFctr2Step =   get_option_float("svdpp_item_factor2_step", 1e-3);

  parse_command_line_args();
  parse_implicit_command_line();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);
  if (validation != ""){
    int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false);
    init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &svdpp_predict);
  }

  svdpp_init();

  if (load_factors_from_file){
    load_matrix_market_matrix(training + "_U.mm", 0, 2*D);
    load_matrix_market_matrix(training + "_V.mm", M, D);
    vec user_bias = load_matrix_market_vector(training +"_U_bias.mm", false, true);
    assert(user_bias.size() == M);
    vec item_bias = load_matrix_market_vector(training +"_V_bias.mm", false, true);
    assert(item_bias.size() == N);
    for (uint i=0; i<M+N; i++){
      latent_factors_inmem[i].bias = ((i<M)?user_bias[i] : item_bias[i-M]);
    }
    vec gm = load_matrix_market_vector(training + "_global_mean.mm", false, true);
    globalMean = gm[0];
 }

  /* Run */
  SVDPPVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
  set_engine_flags(engine);
  pengine = &engine;
  engine.run(program, niters);

  /* Output latent factor matrices in matrix-market format */
  output_svdpp_result(training);
  test_predictions(&svdpp_predict);    


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}