void dd::FactorGraph::load(const CmdParser & cmd, const bool is_quiet) { // get factor graph file names from command line arguments std::string weight_file = cmd.weight_file->getValue(); std::string variable_file = cmd.variable_file->getValue(); std::string factor_file = cmd.factor_file->getValue(); std::string edge_file = cmd.edge_file->getValue(); std::string filename_edges = edge_file; std::string filename_factors = factor_file; std::string filename_variables = variable_file; std::string filename_weights = weight_file; // load variables long long n_loaded = read_variables(filename_variables, *this); assert(n_loaded == n_var); if (!is_quiet) { std::cout << "LOADED VARIABLES: #" << n_loaded << std::endl; std::cout << " N_QUERY: #" << n_query << std::endl; std::cout << " N_EVID : #" << n_evid << std::endl; } // load factors n_loaded = read_factors(filename_factors, *this); assert(n_loaded == n_factor); if (!is_quiet) { std::cout << "LOADED FACTORS: #" << n_loaded << std::endl; } // load weights n_loaded = read_weights(filename_weights, *this); assert(n_loaded == n_weight); if (!is_quiet) { std::cout << "LOADED WEIGHTS: #" << n_loaded << std::endl; } // sort the above components // NOTE This is very important, as read_edges assume variables, // factors and weights are ordered so that their id is the index // where they are stored in the array this->sort_by_id(); // load edges n_loaded = read_edges(edge_file, *this); if (!is_quiet) { std::cout << "LOADED EDGES: #" << n_loaded << std::endl; } // construct edge-based store this->organize_graph_by_edge(); this->safety_check(); assert(this->is_usable() == true); }
int main(int argc, const char ** argv) { mytimer.start(); print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("rating2"); knn_sample_percent = get_option_float("knn_sample_percent", 1.0); if (knn_sample_percent <= 0 || knn_sample_percent > 1) logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl; num_ratings = get_option_int("num_ratings", 10); if (num_ratings <= 0) logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl; debug = get_option_int("debug", 0); tokens_per_row = get_option_int("tokens_per_row", tokens_per_row); std::string algorithm = get_option_string("algorithm"); /* Basic arguments for RBM algorithm */ rbm_bins = get_option_int("rbm_bins", rbm_bins); rbm_scaling = get_option_float("rbm_scaling", rbm_scaling); if (algorithm == "svdpp" || algorithm == "svd++") algo = SVDPP; else if (algorithm == "biassgd") algo = BIASSGD; else if (algorithm == "rbm") algo = RBM; else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl; parse_command_line_args(); /* Preprocess data if needed, or discover preprocess files */ int nshards = 0; if (tokens_per_row == 3) nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false); else if (tokens_per_row == 4) nshards = convert_matrixmarket4<edge_data4>(training); else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl; assert(M > 0 && N > 0); latent_factors_inmem.resize(M+N); // Initialize in-memory vertices. //initialize data structure to hold the matrix read from file if (algo == RBM){ #pragma omp parallel for for (uint i=0; i< M+N; i++){ if (i < M){ latent_factors_inmem[i].pvec = zeros(D*3); } else { latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D); } } } read_factors(training); if ((uint)num_ratings > N){ logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl; num_ratings = N; } srand(time(NULL)); /* Run */ if (tokens_per_row == 3){ RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } else if (tokens_per_row == 4){ RatingVerticesInMemProgram<VertexDataType, edge_data4> program; graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } /* Output latent factor matrices in matrix-market format */ output_knn_result(training); rating_stats(); if (users_without_ratings > 0) logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl; if (users_no_ratings > 0) logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl; /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }