int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("als-inmemory-factors"); lambda = get_option_float("lambda", 0.065); parse_command_line_args(); parse_implicit_command_line(); if (unittest == 1){ if (training == "") training = "test_wals"; niters = 100; } /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket4<edge_data>(training); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); if (validation != ""){ int vshards = convert_matrixmarket4<EdgeDataType>(validation, false, M==N, VALIDATION, 0); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &wals_predict, true, false, 0); } if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } /* Run */ WALSVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_als_result(training); test_predictions(&wals_predict); if (unittest == 1){ if (dtraining_rmse > 0.03) logstream(LOG_FATAL)<<"Unit test 1 failed. Training RMSE is: " << training_rmse << std::endl; if (dvalidation_rmse > 0.61) logstream(LOG_FATAL)<<"Unit test 1 failed. Validation RMSE is: " << validation_rmse << std::endl; } /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD_WEIGHT); if (distance_metric != JACCARD_WEIGHT) logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl; debug = get_option_int("debug", 0); parse_command_line_args(); //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA) // logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl; mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true); assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("als-inmemory-factors"); lambda = get_option_float("lambda", 0.065); user_sparsity = get_option_float("user_sparsity", 0.9); movie_sparsity = get_option_float("movie_sparsity", 0.9); algorithm = get_option_int("algorithm", SPARSE_USR_FACTOR); parse_command_line_args(); parse_implicit_command_line(); if (user_sparsity < 0.5 || user_sparsity >= 1) logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --user_sparsity=XX in this range" << std::endl; if (movie_sparsity < 0.5 || movie_sparsity >= 1) logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --movie_sparsity=XX in this range" << std::endl; if (algorithm != SPARSE_USR_FACTOR && algorithm != SPARSE_BOTH_FACTORS && algorithm != SPARSE_ITM_FACTOR) logstream(LOG_FATAL)<<"Algorithm should be 1 for SPARSE_USR_FACTOR, 2 for SPARSE_ITM_FACTOR and 3 for SPARSE_BOTH_FACTORS" << std::endl; /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sparse_als_predict); } if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } /* Run */ ALSVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_als_result(training); test_predictions(&sparse_als_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("rbm-inmemory-factors"); /* Basic arguments for RBM algorithm */ rbm_bins = get_option_int("rbm_bins", rbm_bins); rbm_alpha = get_option_float("rbm_alpha", rbm_alpha); rbm_beta = get_option_float("rbm_beta", rbm_beta); rbm_mult_step_dec = get_option_float("rbm_mult_step_dec", rbm_mult_step_dec); rbm_scaling = get_option_float("rbm_scaling", rbm_scaling); parse_command_line_args(); parse_implicit_command_line(); mytimer.start(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<float>(training); rbm_init(); if (validation != "") { int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &rbm_predict); } /* load initial state from disk (optional) */ if (load_factors_from_file) { load_matrix_market_matrix(training + "_U.mm", 0, 3*D); load_matrix_market_matrix(training + "_V.mm", M, rbm_bins*(D+1)); } print_config(); /* Run */ RBMVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_rbm_result(training); test_predictions(&rbm_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ beta = get_option_float("beta", 1); debug = get_option_int("debug", 0); parse_command_line_args(); parse_implicit_command_line(); D = 0; //no feature vector is needed binary_relevance_threshold = 0; //treat all edge values as binary /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); //read initial vector from file std::cout << "Load CTR vector from file" << training << ":vec" << std::endl; load_matrix_market_vector(training + ":vec", Y_POS, false, false); mu_ij = zeros(M+N); sigma_ij = ones(M+N); if (validation != ""){ //read validation data (optional) vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false); validation_targets = load_matrix_market_vector(validation + ":vec", false, false); Me = validation_targets.size(); } print_config(); /* Run */ AdPredictorVerticesInMemProgram program; metrics m("adpredictor"); graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_adpredictor_result(training); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("itemsim2rating2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); debug = get_option_int("debug", 0); parse_command_line_args(); std::string similarity = get_option_string("similarity", ""); if (similarity == "") Rcpp::Rcerr<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl; undirected = get_option_int("undirected", 1); Q = get_option_float("Q", Q); K = get_option_int("K"); mytimer.start(); vec unused; int nshards = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, unused); assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); set_engine_flags(engine); out_file = open_file((training + "-rec").c_str(), "w"); //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); Rcpp::Rcout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl; if (zero_edges) Rcpp::Rcout<<"Found: " << zero_edges<< " user edges with weight zero. Those are ignored." <<std::endl; delete[] relevant_items; fclose(out_file); return 0; }
int main(int argc, const char ** argv) { //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("climf-inmemory-factors"); /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ sgd_lambda = get_option_float("sgd_lambda", 1e-3); sgd_gamma = get_option_float("sgd_gamma", 1e-4); sgd_step_dec = get_option_float("sgd_step_dec", 1.0); binary_relevance_thresh = get_option_float("binary_relevance_thresh", 0); halt_on_mrr_decrease = get_option_int("halt_on_mrr_decrease", 0); num_ratings = get_option_int("num_ratings", 10000); //number of top predictions over which we compute actual MRR verbose = get_option_int("verbose", 0); debug = get_option_int("debug", 0); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ bool allow_square = false; int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, allow_square); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file, 0.01); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION); init_mrr_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards); } if (load_factors_from_file) { load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } print_config(); /* Run */ SGDVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_sgd_result(training); test_predictions(&climf_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("sgd-inmemory-factors"); /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ sgd_lambda = get_option_float("sgd_lambda", 1e-3); sgd_gamma = get_option_float("sgd_gamma", 1e-3); sgd_step_dec = get_option_float("sgd_step_dec", 0.9); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training, NULL, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, NULL, 0, 0, 3, VALIDATION, false); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sgd_predict); } /* load initial state from disk (optional) */ if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } print_config(); /* Run */ SGDVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_sgd_result(training); test_predictions(&sgd_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); metrics m("nmf-inmemory-factors"); parse_command_line_args(); parse_implicit_command_line(); niters *= 2; //each NMF iteration is composed of two sub iters /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<float>(training, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false); if (vshards != -1) init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &nmf_predict); } if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } sum_of_item_latent_features = zeros(D); sum_of_user_latent_feautres = zeros(D); /* Run */ NMFVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_nmf_result(training); test_predictions(&nmf_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("sgd-inmemory-factors"); algorithm = get_option_string("algorithm", "global_mean"); if (algorithm == "global_mean") algo = GLOBAL_MEAN; else if (algorithm == "user_mean") algo = USER_MEAN; else if (algorithm == "item_mean") algo = ITEM_MEAN; else logstream(LOG_FATAL)<<"Unsupported algorithm name. Should be --algorithm=XX where XX is one of [global_mean,user_mean,item_mean] for example --algorithm=global_mean" << std::endl; parse_command_line_args(); mytimer.start(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<float>(training, NULL, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, false); rmse_vec = zeros(number_of_omp_threads()); print_config(); /* Run */ BaselineVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, 1); if (algo == USER_MEAN || algo == ITEM_MEAN) output_baseline_result(training); test_predictions(&baseline_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("pmf-inmemory-factors"); lambda = get_option_float("lambda", 0.065); debug = get_option_int("debug", debug); pmf_burn_in = get_option_int("pmf_burn_in", pmf_burn_in); pmf_additional_output = get_option_int("pmf_additional_output", pmf_additional_output); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<edge_data>(training, NULL, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); init_pmf(); if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } /* Run */ PMFVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine, true); pengine = &engine; engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); debug = get_option_int("debug", debug); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; logstream(LOG_INFO) << "M = " << M << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA || distance_metric == PROB) latent_factors_inmem.resize(M); if (distance_metric == PROB) prob_sim_normalization_constant = (double)L / (double)(M*N-L); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* write the matrix market info header to be used later */ FILE * pmm = fopen((training + "-topk:info").c_str(), "w"); if (pmm == NULL) logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl; fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs)); fclose(pmm); /* sort output files */ logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("itemsim2rating2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); debug = get_option_int("debug", 0); parse_command_line_args(); std::string similarity = get_option_string("similarity", ""); if (similarity == "") logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl; undirected = get_option_int("undirected", 0); mytimer.start(); int nshards = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, °rees); assert(M > 0 && N > 0); prob_sim_normalization_constant = (double)L / (double)(M*N-L); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s-rec.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } K = get_option_int("K"); assert(K > 0); //run the program engine.run(program, niters); for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl; logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("libfm"); //specific command line parameters for libfm libfm_rate = get_option_float("libfm_rate", libfm_rate); libfm_regw = get_option_float("libfm_regw", libfm_regw); libfm_regv = get_option_float("libfm_regv", libfm_regv); libfm_mult_dec = get_option_float("libfm_mult_dec", libfm_mult_dec); D = get_option_int("D", D); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket4<edge_data>(training, false); init_libfm(); if (validation != ""){ int vshards = convert_matrixmarket4<EdgeDataType>(validation, true, M==N, VALIDATION); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &libfm_predict, false, true, 1); } if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); load_matrix_market_matrix(training + "_T.mm", M+N, D); load_matrix_market_matrix(training + "_L.mm", M+N+K, D); vec user_bias = load_matrix_market_vector(training +"_U_bias.mm", false, true); vec item_bias = load_matrix_market_vector(training +"_V_bias.mm", false, true); vec time_bias = load_matrix_market_vector(training+ "_T_bias.mm", false, true); vec last_item_bias = load_matrix_market_vector(training+"_L_bias.m", false, true); for (uint i=0; i<M+N+K+M; i++){ if (i < M) latent_factors_inmem[i].bias = user_bias[i]; else if (i <M+N) latent_factors_inmem[i].bias = item_bias[i-M]; else if (i <M+N+K) latent_factors_inmem[i].bias = time_bias[i-M-N]; else latent_factors_inmem[i].bias = last_item_bias[i-M-N-K]; } vec gm = load_matrix_market_vector(training + "_global_mean.mm", false, true); globalMean = gm[0]; } /* Run */ LIBFMVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output test predictions in matrix-market format */ output_libfm_result(training); test_predictions3(&libfm_predict, 1); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("label_propagation"); alpha = get_option_float("alpha", alpha); debug = get_option_int("debug", debug); parse_command_line_args(); //load graph (adj matrix) from file int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true); if (M != N) logstream(LOG_FATAL)<<"Label propagation supports only square matrices" << std::endl; init_feature_vectors<std::vector<vertex_data> >(M, latent_factors_inmem, false); //load seed initialization from file load_matrix_market_matrix(training + ".seeds", 0, D); #pragma omp parallel for for (int i=0; i< (int)M; i++){ //normalize seed probabilities to sum up to one if (latent_factors_inmem[i].seed){ assert(sum(latent_factors_inmem[i].pvec) != 0); latent_factors_inmem[i].pvec /= sum(latent_factors_inmem[i].pvec); continue; } //other nodes get random label probabilities for (int j=0; j< D; j++) latent_factors_inmem[i].pvec[j] = drand48(); } /* load initial state from disk (optional) */ if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); } /* Run */ LPVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_lp_result(training); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("label_propagation"); contexts_file = get_option_string("contexts"); nouns_file = get_option_string("nouns"); pos_seeds = get_option_string("pos_seeds"); neg_seeds = get_option_string("neg_seeds"); parse_command_line_args(); load_map_from_txt_file(contexts.string2nodeid, contexts_file, 1); load_map_from_txt_file(nouns.string2nodeid, nouns_file, 1); //load graph (adj matrix) from file int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem); load_seeds_from_txt_file(nouns.string2nodeid, pos_seeds, false); load_seeds_from_txt_file(nouns.string2nodeid, neg_seeds, true); #pragma omp parallel for for (int i=0; i< (int)M; i++){ //normalize seed probabilities to sum up to one if (latent_factors_inmem[i].seed){ if (sum(latent_factors_inmem[i].pvec) != 0) latent_factors_inmem[i].pvec /= sum(latent_factors_inmem[i].pvec); continue; } //other nodes get random label probabilities for (int j=0; j< D; j++) latent_factors_inmem[i].pvec[j] = drand48(); } /* load initial state from disk (optional) */ if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); } /* Run */ COEMVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_coem_result(training); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { mytimer.start(); print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("rating2"); knn_sample_percent = get_option_float("knn_sample_percent", 1.0); if (knn_sample_percent <= 0 || knn_sample_percent > 1) logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl; num_ratings = get_option_int("num_ratings", 10); if (num_ratings <= 0) logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl; debug = get_option_int("debug", 0); tokens_per_row = get_option_int("tokens_per_row", tokens_per_row); std::string algorithm = get_option_string("algorithm"); /* Basic arguments for RBM algorithm */ rbm_bins = get_option_int("rbm_bins", rbm_bins); rbm_scaling = get_option_float("rbm_scaling", rbm_scaling); if (algorithm == "svdpp" || algorithm == "svd++") algo = SVDPP; else if (algorithm == "biassgd") algo = BIASSGD; else if (algorithm == "rbm") algo = RBM; else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl; parse_command_line_args(); /* Preprocess data if needed, or discover preprocess files */ int nshards = 0; if (tokens_per_row == 3) nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false); else if (tokens_per_row == 4) nshards = convert_matrixmarket4<edge_data4>(training); else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl; assert(M > 0 && N > 0); latent_factors_inmem.resize(M+N); // Initialize in-memory vertices. //initialize data structure to hold the matrix read from file if (algo == RBM){ #pragma omp parallel for for (uint i=0; i< M+N; i++){ if (i < M){ latent_factors_inmem[i].pvec = zeros(D*3); } else { latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D); } } } read_factors(training); if ((uint)num_ratings > N){ logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl; num_ratings = N; } srand(time(NULL)); /* Run */ if (tokens_per_row == 3){ RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } else if (tokens_per_row == 4){ RatingVerticesInMemProgram<VertexDataType, edge_data4> program; graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } /* Output latent factor matrices in matrix-market format */ output_knn_result(training); rating_stats(); if (users_without_ratings > 0) logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl; if (users_no_ratings > 0) logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl; /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA) latent_factors_inmem.resize(M); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++){ fflush(out_files[i]); fclose(out_files[i]); } std::cout<<"Created " << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; delete[] relevant_items; return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("bsvd_coor-inmemory-factors"); /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ alpha = get_option_float("alpha", 1.0); lambda = get_option_float("lambda", 1.0); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); // initialize features vectors std::string initType; switch (init_features_type) { case 1: // bounded random // randomly initialize feature vectors so that rmin < rate < rmax initType = "bounded-random"; init_random_bounded<std::vector<vertex_data> >(latent_factors_inmem, !load_factors_from_file); break; case 2: // baseline initType = "baseline"; init_baseline<std::vector<vertex_data> >(latent_factors_inmem); load_matrix_market_matrix(training + "-baseline_P.mm", 0, D); load_matrix_market_matrix(training + "-baseline_Q.mm", M, D); break; case 3: // random initType = "random"; init_feature_vectors<std::vector<vertex_data> >(M + N, latent_factors_inmem, !load_factors_from_file); break; default: // random initType = "random"; init_feature_vectors<std::vector<vertex_data> >(M + N, latent_factors_inmem, !load_factors_from_file); } if (validation != "") { int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &bsvd_predict); } /* load initial state from disk (optional) */ if (load_factors_from_file) { load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } /* Run */ ALSVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_als_result(training); test_predictions(&bsvd_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { // print_copyright(); write_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("sgd-inmemory-factors"); /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ sgd_lambda = get_option_float("sgd_lambda", 1e-3); sgd_gamma = get_option_float("sgd_gamma", 1e-3); sgd_step_dec = get_option_float("sgd_step_dec", 0.9); int file_format = get_option_int("ff", 3); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, file_format, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, !load_factors_from_file); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &sgd_predict); } /* load initial state from disk (optional) */ if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, D); load_matrix_market_matrix(training + "_V.mm", M, D); } print_config(); /* Run */ SGDVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; timer train_timer; engine.run(program, niters); // std::cout << "Trn Time for file test: " << std::setw(10) << train_timer.current_time() / niters << std::endl; std::ofstream ofs(result.c_str(), std::ofstream::out | std::ofstream::app); ofs << D << " " << train_timer.current_time() << " "; /* Run TopN program */ n_top = get_option_int("n_int", 10); /*timer test_timer1; ofs << test_timer1.current_time() << " ";*/ //run_general_topn_program(pengine, &latent_factors_inmem, &sgd_predict); timer index_timer; kd_Node* mroot = init_kdtree(&latent_factors_inmem); ofs << index_timer.current_time() << " "; timer test_timer; /* construct kd tree index */ // ofs << "constructing index: " << test_timer.current_time() << " "; run_kd_topn_program(pengine, &latent_factors_inmem, mroot); // std::coua << "Tst Time: " << std::setw(10) << test_timer.current_time() << std::endl; ofs << test_timer.current_time() << std::endl; ofs.close(); /* Output latent factor matrices in matrix-market format */ output_sgd_result(training); test_predictions(&sgd_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("svdpp-inmemory-factors"); svdpp.step_dec = get_option_float("svdpp_step_dec", 0.9); svdpp.itmBiasStep = get_option_float("svdpp_item_bias_step", 1e-3); svdpp.itmBiasReg = get_option_float("svdpp_item_bias_reg", 1e-3); svdpp.usrBiasStep = get_option_float("svdpp_user_bias_step", 1e-3); svdpp.usrBiasReg = get_option_float("svdpp_user_bias_reg", 1e-3); svdpp.usrFctrStep = get_option_float("svdpp_user_factor_step", 1e-3); svdpp.usrFctrReg = get_option_float("svdpp_user_factor_reg", 1e-3); svdpp.itmFctrReg = get_option_float("svdpp_item_factor_reg", 1e-3); svdpp.itmFctrStep = get_option_float("svdpp_item_factor_step", 1e-3); svdpp.itmFctr2Reg = get_option_float("svdpp_item_factor2_reg", 1e-3); svdpp.itmFctr2Step = get_option_float("svdpp_item_factor2_step", 1e-3); parse_command_line_args(); parse_implicit_command_line(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); if (validation != ""){ int vshards = convert_matrixmarket<EdgeDataType>(validation, 0, 0, 3, VALIDATION, false); init_validation_rmse_engine<VertexDataType, EdgeDataType>(pvalidation_engine, vshards, &svdpp_predict); } svdpp_init(); if (load_factors_from_file){ load_matrix_market_matrix(training + "_U.mm", 0, 2*D); load_matrix_market_matrix(training + "_V.mm", M, D); vec user_bias = load_matrix_market_vector(training +"_U_bias.mm", false, true); assert(user_bias.size() == M); vec item_bias = load_matrix_market_vector(training +"_V_bias.mm", false, true); assert(item_bias.size() == N); for (uint i=0; i<M+N; i++){ latent_factors_inmem[i].bias = ((i<M)?user_bias[i] : item_bias[i-M]); } vec gm = load_matrix_market_vector(training + "_global_mean.mm", false, true); globalMean = gm[0]; } /* Run */ SVDPPVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, niters); /* Output latent factor matrices in matrix-market format */ output_svdpp_result(training); test_predictions(&svdpp_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }