int main(int argc, const char ** argv) { print_copyright(); /* CE_Graph initialization will read the command line arguments and the configuration file. */ CE_Graph_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD_WEIGHT); if (distance_metric != JACCARD_WEIGHT) logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl; debug = get_option_int("debug", 0); parse_command_line_args(); //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA) // logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl; mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true); assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); /* Run */ ItemDistanceProgram program; CE_Graph_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; return 0; }
void init_validation_rmse_engine(graphchi_engine<VertexDataType,EdgeDataType> *& pvalidation_engine, int nshards,float (*prediction_func)(const vertex_data & user, const vertex_data & movie, float rating, double & prediction, void * extra), bool _time_weighting, bool _time_nodes, int _matlab_time_offset){ metrics * m = new metrics("validation_rmse_engine"); graphchi_engine<VertexDataType, EdgeDataType> * engine = new graphchi_engine<VertexDataType, EdgeDataType>(validation, nshards, false, *m); set_engine_flags(*engine); pvalidation_engine = engine; time_weighting = _time_weighting; time_nodes = _time_nodes; matlab_time_offset = _matlab_time_offset; pprediction_func = prediction_func; num_threads = number_of_omp_threads(); }
int main(int argc, const char ** argv) { print_copyright(); //* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("sgd-inmemory-factors"); algorithm = get_option_string("algorithm", "global_mean"); if (algorithm == "global_mean") algo = GLOBAL_MEAN; else if (algorithm == "user_mean") algo = USER_MEAN; else if (algorithm == "item_mean") algo = ITEM_MEAN; else logstream(LOG_FATAL)<<"Unsupported algorithm name. Should be --algorithm=XX where XX is one of [global_mean,user_mean,item_mean] for example --algorithm=global_mean" << std::endl; parse_command_line_args(); mytimer.start(); /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<float>(training, NULL, 0, 0, 3, TRAINING, false); init_feature_vectors<std::vector<vertex_data> >(M+N, latent_factors_inmem, false); rmse_vec = zeros(number_of_omp_threads()); print_config(); /* Run */ BaselineVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); pengine = &engine; engine.run(program, 1); if (algo == USER_MEAN || algo == ITEM_MEAN) output_baseline_result(training); test_predictions(&baseline_predict); /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); debug = get_option_int("debug", debug); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; logstream(LOG_INFO) << "M = " << M << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA || distance_metric == PROB) latent_factors_inmem.resize(M); if (distance_metric == PROB) prob_sim_normalization_constant = (double)L / (double)(M*N-L); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* write the matrix market info header to be used later */ FILE * pmm = fopen((training + "-topk:info").c_str(), "w"); if (pmm == NULL) logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl; fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs)); fclose(pmm); /* sort output files */ logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("itemsim2rating2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); debug = get_option_int("debug", 0); parse_command_line_args(); std::string similarity = get_option_string("similarity", ""); if (similarity == "") logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl; undirected = get_option_int("undirected", 0); mytimer.start(); int nshards = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, °rees); assert(M > 0 && N > 0); prob_sim_normalization_constant = (double)L / (double)(M*N-L); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s-rec.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } K = get_option_int("K"); assert(K > 0); //run the program engine.run(program, niters); for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl; logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA) latent_factors_inmem.resize(M); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++){ fflush(out_files[i]); fclose(out_files[i]); } std::cout<<"Created " << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; delete[] relevant_items; return 0; }