int diagonalize_bisection(localized_matrix<double, MATRIX_MAJOR>& mata, localized_matrix<double, MATRIX_MAJOR>& matb, double* eigvals, rokko::parameters const& params, timer& timer) { rokko::parameters params_out; char jobz = 'N'; // only eigenvalues int dim = mata.innerSize(); int lda = mata.outerSize(); int ldb = matb.outerSize(); lapack_int m; // output: found eigenvalues double abstol; get_key(params, "abstol", abstol); if (abstol < 0) { std::cerr << "Error in diagonalize_bisection" << std::endl << "abstol is negative value, which means QR method." << std::endl << "To use dsygvx as bisection solver, set abstol a positive value" << std::endl; throw; } if (!params.defined("abstol")) { // default: optimal value for bisection method abstol = 2 * LAPACKE_dlamch('S'); } params_out.set("abstol", abstol); char uplow = get_matrix_part(params); lapack_int il, iu; double vl, vu; char range = get_eigenvalues_range(params, vl, vu, il, iu); std::vector<lapack_int> ifail(dim); timer.start(timer_id::diagonalize_diagonalize); int info; if(mata.is_col_major()) info = LAPACKE_dsygvx(LAPACK_COL_MAJOR, 1, jobz, range, uplow, dim, &mata(0,0), lda, &matb(0,0), ldb, vl, vu, il, iu, abstol, &m, eigvals, NULL, lda, &ifail[0]); else info = LAPACKE_dsygvx(LAPACK_ROW_MAJOR, 1, jobz, range, uplow, dim, &mata(0,0), lda, &matb(0,0), ldb, vl, vu, il, iu, abstol, &m, eigvals, NULL, lda, &ifail[0]); timer.stop(timer_id::diagonalize_diagonalize); timer.start(timer_id::diagonalize_finalize); if (info) { std::cerr << "error at dsygvx function. info=" << info << std::endl; if (info < 0) { std::cerr << "This means that "; std::cerr << "the " << abs(info) << "-th argument had an illegal value." << std::endl; } exit(1); } params_out.set("m", m); params_out.set("ifail", ifail); if (params.get_bool("verbose")) { print_verbose("dsygvx (bisection)", jobz, range, uplow, vl, vu, il, iu, params_out); } timer.stop(timer_id::diagonalize_finalize); return info; }
bool ripng_router::check_startup() { if (!router::check_startup()) return false; int sock = socket(PF_INET6, SOCK_DGRAM, 0); if (sock < 0) return false; sockaddr_in6 local; memset(&local, 0, sizeof(local)); local.sin6_family = AF_INET6; local.sin6_port = htons(522); if (bind(sock, (sockaddr *)&local, sizeof(local)) < 0) { if (should_log(WARNING)) log().perror("Failed to bind"); close(sock); return false; } if (!m_sock.register_fd(sock)) { close(sock); return false; } if (!m_sock.enable_mc_loop(false)) return false; g_mrd->mrib().install_listener(this); m_garbcol_timer.start(); return true; }
int main(int argc, const char ** argv) { print_copyright(); /* CE_Graph initialization will read the command line arguments and the configuration file. */ CE_Graph_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD_WEIGHT); if (distance_metric != JACCARD_WEIGHT) logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl; debug = get_option_int("debug", 0); parse_command_line_args(); //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA) // logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl; mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true); assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); /* Run */ ItemDistanceProgram program; CE_Graph_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("itemsim2rating2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); debug = get_option_int("debug", 0); parse_command_line_args(); std::string similarity = get_option_string("similarity", ""); if (similarity == "") Rcpp::Rcerr<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl; undirected = get_option_int("undirected", 1); Q = get_option_float("Q", Q); K = get_option_int("K"); mytimer.start(); vec unused; int nshards = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, unused); assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); set_engine_flags(engine); out_file = open_file((training + "-rec").c_str(), "w"); //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); Rcpp::Rcout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl; if (zero_edges) Rcpp::Rcout<<"Found: " << zero_edges<< " user edges with weight zero. Those are ignored." <<std::endl; delete[] relevant_items; fclose(out_file); return 0; }
/** * Prepare and runTimer a single statement of SQL. */ void prepareAndRun(sqlite3 *db, string stmt, timer &t) { sqlite3_stmt *pStmt; t.start(); checkErr(sqlite3_prepare_v2(db, stmt.c_str(), -1, &pStmt, NULL), __LINE__, db, stmt); while (checkErr(sqlite3_step(pStmt), __LINE__, db, stmt) == SQLITE_ROW); checkErr(sqlite3_finalize(pStmt), __LINE__, db, stmt); t.end(); }
void Compute(graph<vertex>& GA, commandLine P) { t1.start(); long start = P.getOptionLongValue("-r",0); if(GA.V[start].getOutDegree() == 0) { cout << "starting vertex has degree 0" << endl; return; } const uintE K = P.getOptionIntValue("-K",10); const uintE N = P.getOptionIntValue("-N",10); const double t = P.getOptionDoubleValue("-t",3); srand (time(NULL)); uintE seed = rand(); const intE n = GA.n; //walk length probabilities double* fact = newA(double,K); fact[0] = 1; for(long k=1;k<K;k++) fact[k] = k*fact[k-1]; double* probs = newA(double,K); for(long k=0;k<K;k++) probs[k] = exp(-t)*pow(t,k)/fact[k]; unordered_map<uintE,double> p; for(long i=0;i<N;i++) { double randDouble = (double) hashInt(seed++) / UINT_E_MAX; long j = 0; double mass = 0; uintE x = start; do { mass += probs[j]; if(randDouble < mass) break; x = walk(x,GA.V,seed++); j++; } while(j <= K); p[x]++; } for(auto it=p.begin();it!=p.end();it++) { p[it->first] /= N; } free(probs); free(fact); t1.stop(); pairIF* A = newA(pairIF,p.size()); long numNonzerosQ = 0; for(auto it = p.begin(); it != p.end(); it++) { A[numNonzerosQ++] = make_pair(it->first,it->second); } sweepObject sweep = sweepCut(GA,A,numNonzerosQ,start); free(A); cout << "number of vertices touched = " << p.size() << endl; cout << "number of edges touched = " << sweep.vol << endl; cout << "conductance = " << sweep.conductance << " |S| = " << sweep.sizeS << " vol(S) = " << sweep.volS << " edgesCrossing = " << sweep.edgesCrossing << endl; t1.reportTotal("computation time"); }
int shrimp_gateway_impl::read_reply() { m_timer_status = 0; m_io_timer.start(); char* r = new char[10]; int rd = m_shrimp.Readv(r, 1, &m_timer_status); //printf("r: %i\n", r[0]); m_reply = r[0]; //printf("byte read %i\n", rd); return rd; }
int shrimp_gateway_impl::send_command(const shrimp_command_t& command) { m_timer_status = 0; m_io_timer.start(); unsigned char* cmd = new unsigned char[3]; memcpy(cmd, command.to_buf(), command.get_size()); //printf("%i %i %i", cmd[0], cmd[1], cmd[2]); int wd = m_shrimp.Writev(command.to_buf(), command.get_size(), &m_timer_status); //printf("bytes sent %i\n", wd); return wd; }
int main(int argc, const char *argv[]) { logstream(LOG_WARNING)<<"CE_Graph parsers library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; global_logger().set_log_level(LOG_INFO); global_logger().set_log_to_console(true); CE_Graph_init(argc, argv); debug = get_option_int("debug", 0); dir = get_option_string("file_list"); lines = get_option_int("lines", 0); omp_set_num_threads(get_option_int("ncpus", 1)); mytime.start(); FILE * f = fopen(dir.c_str(), "r"); if (f == NULL) logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl; while(true){ char buf[256]; int rc = fscanf(f, "%s\n", buf); if (rc < 1) break; in_files.push_back(buf); } if (in_files.size() == 0) logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl; #pragma omp parallel for for (uint i=0; i< in_files.size(); i++) parse(i); std::cout << "Finished in " << mytime.current_time() << std::endl << "\t direct tweets found: " << links_found << " \t global tweets: " << wide_tweets << "\t http links: " << http_links << "\t retweets: " << retweet_found << "\t total lines in input file : " << total_lines << " \t invalid records (missing names) " << missing_names << std::endl; save_map_to_text_file(string2nodeid, outdir + "map.text"); save_map_to_text_file(nodeid2hash, outdir + "reverse.map.text"); save_map_to_text_file(tweets_per_user, outdir + "tweets_per_user.text"); out_file fout("mm.info"); fprintf(fout.outf, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(fout.outf, "%u %u %lu\n", maxfrom+1, maxto+1, links_found); return 0; }
int main(int argc, const char *argv[]) { logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; global_logger().set_log_level(LOG_INFO); global_logger().set_log_to_console(true); graphchi_init(argc, argv); debug = get_option_int("debug", 0); dir = get_option_string("file_list"); lines = get_option_int("lines", 0); omp_set_num_threads(get_option_int("ncpus", 1)); from_val = get_option_int("from_val", from_val); to_val = get_option_int("to_val", to_val); mid_val = get_option_int("mid_val", mid_val); if (from_val == -1) logstream(LOG_FATAL)<<"Must set from/to " << std::endl; mytime.start(); FILE * f = fopen(dir.c_str(), "r"); if (f == NULL) logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl; while(true){ char buf[256]; int rc = fscanf(f, "%s\n", buf); if (rc < 1) break; in_files.push_back(buf); } if (in_files.size() == 0) logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl; #pragma omp parallel for for (int i=0; i< (int)in_files.size(); i++) parse(i); std::cout << "Finished in " << mytime.current_time() << std::endl; save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text"); return 0; }
int main(int argc, const char *argv[]) { Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; global_logger().set_log_level(LOG_INFO); global_logger().set_log_to_console(true); graphchi_init(argc, argv); debug = get_option_int("debug", 0); dir = get_option_string("file_list"); lines = get_option_int("lines", 0); omp_set_num_threads(get_option_int("ncpus", 1)); mytime.start(); FILE * f = fopen(dir.c_str(), "r"); if (f == NULL) logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl; while(true){ char buf[256]; int rc = fscanf(f, "%s\n", buf); if (rc < 1) break; in_files.push_back(buf); } if (in_files.size() == 0) logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl; //#pragma omp parallel for for (uint i=0; i< in_files.size(); i++) parse(i); std::cout << "Finished in " << mytime.current_time() << std::endl << "\t total lines in input file : " << total_lines << "\t max from: " << maxfrom << "\t max to: " <<maxto << std::endl; return 0; }
void Compute(graph<vertex>& GA, commandLine P) { t5.start(); long length = P.getOptionLongValue("-r",0); //number of words per vertex char* oFile = P.getOptionValue("-out"); //file to write eccentricites srand (time(NULL)); uintT seed = rand(); cout << "seed = " << seed << endl; t0.start(); long n = GA.n; uintE* ecc = newA(uintE,n); uintE* ecc2 = newA(uintE,n); {parallel_for(long i=0;i<n;i++) { ecc[i] = ecc2[i] = 0; }} t0.stop(); //BEGIN COMPUTE CONNECTED COMPONENTS t1.start(); intE* Labels = newA(intE,n); {parallel_for(long i=0;i<n;i++) { if(GA.V[i].getOutDegree() == 0) Labels[i] = -i-1; //singletons else Labels[i] = INT_E_MAX; }} //get max degree vertex uintE maxV = sequence::reduce<uintE>((intE)0,(intE)n,maxF<intE>(),getDegree<vertex>(GA.V)); //visit large component with BFS CCBFS(maxV,GA,Labels); //visit small components with label propagation Components(GA, Labels); //sort by component ID intPair* CCpairs = newA(intPair,n); {parallel_for(long i=0;i<n;i++) if(Labels[i] < 0) CCpairs[i] = make_pair(-Labels[i]-1,i); else CCpairs[i] = make_pair(Labels[i],i); } free(Labels); intSort::iSort(CCpairs, n, n+1,firstF<uintE,uintE>()); uintE* changes = newA(uintE,n); changes[0] = 0; {parallel_for(long i=1;i<n;i++) changes[i] = (CCpairs[i].first != CCpairs[i-1].first) ? i : UINT_E_MAX;} uintE* CCoffsets = newA(uintE,n); uintE numCC = sequence::filter(changes, CCoffsets, n, nonMaxF()); CCoffsets[numCC] = n; free(changes); t1.stop(); //END COMPUTE CONNECTED COMPONENTS //init data structures t0.start(); length = max((long)1,min((n+63)/64,(long)length)); long* VisitedArray = newA(long,n*length); long* NextVisitedArray = newA(long,n*length); int* flags = newA(int,n); {parallel_for(long i=0;i<n;i++) flags[i] = -1;} uintE* starts = newA(uintE,n); intPair* pairs = newA(intPair,n); t0.stop(); //BEGIN COMPUTE ECCENTRICITES PER COMPONENT for(long k = 0; k < numCC; k++) { t2.start(); uintE o = CCoffsets[k]; uintE CCsize = CCoffsets[k+1] - o; if(CCsize == 2) { //size 2 CC's have ecc of 1 ecc[CCpairs[o].second] = ecc[CCpairs[o+1].second] = 1; t2.stop(); } else if(CCsize > 1) { //size 1 CC's already have ecc of 0 //do main computation long myLength = min((long)length,((long)CCsize+63)/64); //initialize bit vectors for component vertices {parallel_for(long i=0;i<CCsize;i++) { uintT v = CCpairs[o+i].second; parallel_for(long j=0;j<myLength;j++) VisitedArray[v*myLength+j] = NextVisitedArray[v*myLength+j] = 0; }} long sampleSize = min((long)CCsize,(long)64*myLength); uintE* starts2 = newA(uintE,sampleSize); //pick random vertices (could have duplicates) {parallel_for(ulong i=0;i<sampleSize;i++) { uintT index = hashInt(i+seed) % CCsize; if(flags[index] == -1 && CAS(&flags[index],-1,(int)i)) { starts[i] = CCpairs[o+index].second; NextVisitedArray[CCpairs[o+index].second*myLength + i/64] = (long) 1<<(i%64); } else starts[i] = UINT_E_MAX; }} //remove duplicates uintE numUnique = sequence::filter(starts,starts2,sampleSize,nonMaxF()); //reset flags parallel_for(ulong i=0;i<sampleSize;i++) { uintT index = hashInt(i+seed) % CCsize; if(flags[index] == i) flags[index] = -1; } //first phase vertexSubset Frontier(n,numUnique,starts2); //initial frontier //note: starts2 will be freed inside the following loop uintE round = 0; while(!Frontier.isEmpty()){ round++; vertexMap(Frontier, Ecc_Vertex_F(myLength,VisitedArray,NextVisitedArray)); vertexSubset output = edgeMap(GA, Frontier, Ecc_F(myLength,VisitedArray,NextVisitedArray,ecc,round), GA.m/20); Frontier.del(); Frontier = output; } Frontier.del(); t2.stop(); //second phase if size of CC > 64 if(CCsize > 1024) { //sort by ecc t3.start(); {parallel_for(long i=0;i<CCsize;i++) { pairs[i] = make_pair(ecc[CCpairs[o+i].second],CCpairs[o+i].second); }} intPair maxR = sequence::reduce(pairs,CCsize,maxFirstF()); intSort::iSort(pairs, CCsize, 1+maxR.first, firstF<uintE,uintE>()); t3.stop(); t4.start(); //reset bit vectors for component vertices {parallel_for(long i=0;i<CCsize;i++) { uintT v = CCpairs[o+i].second; parallel_for(long j=0;j<myLength;j++) VisitedArray[v*myLength+j] = NextVisitedArray[v*myLength+j] = 0; }} starts2 = newA(uintE,sampleSize); //pick starting points with highest ecc ("fringe" vertices) {parallel_for(long i=0;i<sampleSize;i++) { intE v = pairs[CCsize-i-1].second; starts2[i] = v; NextVisitedArray[v*myLength + i/64] = (long) 1<<(i%64); }} vertexSubset Frontier2(n,sampleSize,starts2); //initial frontier //note: starts2 will be freed inside the following loop round = 0; while(!Frontier2.isEmpty()){ round++; vertexMap(Frontier2, Ecc_Vertex_F(myLength,VisitedArray,NextVisitedArray)); vertexSubset output = edgeMap(GA, Frontier2,Ecc_F(myLength,VisitedArray,NextVisitedArray,ecc2,round), GA.m/20); Frontier2.del(); Frontier2 = output; } Frontier2.del(); {parallel_for(long i=0;i<n;i++) ecc[i] = max(ecc[i],ecc2[i]);} t4.stop(); } }
int main(int argc, const char *argv[]) { Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; global_logger().set_log_level(LOG_INFO); global_logger().set_log_to_console(true); graphchi_init(argc, argv); debug = get_option_int("debug", 0); dir = get_option_string("file_list"); lines = get_option_int("lines", 0); omp_set_num_threads(get_option_int("ncpus", 1)); from_val = get_option_int("from_val", from_val); to_val = get_option_int("to_val", to_val); if (from_val == -1) logstream(LOG_FATAL)<<"Must set from/to " << std::endl; mytime.start(); FILE * f = fopen(dir.c_str(), "r"); if (f == NULL) logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl; while(true){ char buf[256]; int rc = fscanf(f, "%s\n", buf); if (rc < 1) break; in_files.push_back(buf); } if (in_files.size() == 0) logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl; #pragma omp parallel for for (int i=0; i< (int)in_files.size(); i++) parse(i); std::cout << "Finished in " << mytime.current_time() << std::endl; int total_x =0 , total_y = 0; std::map<std::string, int>::iterator it; double h = 0; for (it = p_x.begin(); it != p_x.end(); it++){ total_x+= it->second; h-= (it->second / (double)n)*log2(it->second / (double)n); } for (it = p_y.begin(); it != p_y.end(); it++) total_y+= it->second; assert(total_x == n); assert(total_y == n); double mi = 0; std::map<std::string, uint>::iterator iter; assert(n != 0); int total_p_xy = 0; for (iter = frommap.string2nodeid.begin() ; iter != frommap.string2nodeid.end(); iter++){ double p_xy = iter->second / (double)n; assert(p_xy > 0); char buf[256]; strncpy(buf, iter->first.c_str(), 256); char * first = strtok(buf, "_"); char * second = strtok(NULL, "\n\r "); assert(first && second); double px = p_x[first] / (double)n; double py = p_y[second] / (double)n; assert(px > 0 && py > 0); mi += p_xy * log2(p_xy / (px * py)); total_p_xy += iter->second; } assert(total_p_xy == n); logstream(LOG_INFO)<<"Total examples: " <<n << std::endl; logstream(LOG_INFO)<<"Unique p(x) " << p_x.size() << std::endl; logstream(LOG_INFO)<<"Unique p(y) " << p_y.size() << std::endl; logstream(LOG_INFO)<<"Average F(x) " << total_x / (double)p_x.size() << std::endl; logstream(LOG_INFO)<<"Average F(y) " << total_y / (double)p_y.size() << std::endl; std::cout<<"Mutual information of " << from_val << " [" << header_titles[from_val-1] << "] <-> " << to_val << " [" << header_titles[to_val-1] << "] is: " ; if (mi/h > 1e-3) std::cout<<std::setprecision(3) << mi << std::endl; else std::cout<<"-"<<std::endl; save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text"); logstream(LOG_INFO)<<"Saving map file " << outdir << dir << "map.text" << std::endl; return 0; }
int main(int argc, const char ** argv) { mytimer.start(); print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("rating2"); knn_sample_percent = get_option_float("knn_sample_percent", 1.0); if (knn_sample_percent <= 0 || knn_sample_percent > 1) logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl; num_ratings = get_option_int("num_ratings", 10); if (num_ratings <= 0) logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl; debug = get_option_int("debug", 0); tokens_per_row = get_option_int("tokens_per_row", tokens_per_row); std::string algorithm = get_option_string("algorithm"); /* Basic arguments for RBM algorithm */ rbm_bins = get_option_int("rbm_bins", rbm_bins); rbm_scaling = get_option_float("rbm_scaling", rbm_scaling); if (algorithm == "svdpp" || algorithm == "svd++") algo = SVDPP; else if (algorithm == "biassgd") algo = BIASSGD; else if (algorithm == "rbm") algo = RBM; else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl; parse_command_line_args(); /* Preprocess data if needed, or discover preprocess files */ int nshards = 0; if (tokens_per_row == 3) nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false); else if (tokens_per_row == 4) nshards = convert_matrixmarket4<edge_data4>(training); else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl; assert(M > 0 && N > 0); latent_factors_inmem.resize(M+N); // Initialize in-memory vertices. //initialize data structure to hold the matrix read from file if (algo == RBM){ #pragma omp parallel for for (uint i=0; i< M+N; i++){ if (i < M){ latent_factors_inmem[i].pvec = zeros(D*3); } else { latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D); } } } read_factors(training); if ((uint)num_ratings > N){ logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl; num_ratings = N; } srand(time(NULL)); /* Run */ if (tokens_per_row == 3){ RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } else if (tokens_per_row == 4){ RatingVerticesInMemProgram<VertexDataType, edge_data4> program; graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); set_engine_flags(engine); engine.run(program, 1); } /* Output latent factor matrices in matrix-market format */ output_knn_result(training); rating_stats(); if (users_without_ratings > 0) logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl; if (users_no_ratings > 0) logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl; /* Report execution metrics */ if (!quiet) metrics_report(m); return 0; }
void Compute(graph<vertex>& GA, commandLine P) { t10.start(); char* oFile = P.getOptionValue("-out"); //file to write eccentricites srand (time(NULL)); uintT seed = rand(); cout << "seed = " << seed << endl; t0.start(); long n = GA.n; uintE* ecc = newA(uintE,n); {parallel_for(long i=0;i<n;i++) ecc[i] = UINT_E_MAX;} t0.stop(); //BEGIN COMPUTE CONNECTED COMPONENTS t1.start(); intE* Labels = newA(intE,n); {parallel_for(long i=0;i<n;i++) { if(GA.V[i].getOutDegree() == 0) Labels[i] = -i-1; //singletons else Labels[i] = INT_E_MAX; }} //get max degree vertex uintE maxV = sequence::reduce<uintE>((intE)0,(intE)n,maxF<intE>(),getDegree<vertex>(GA.V)); //visit large component with BFS CCBFS(maxV,GA,Labels); //visit small components with label propagation Components(GA, Labels); //sort by component ID intPair* CCpairs = newA(intPair,n); {parallel_for(long i=0;i<n;i++) if(Labels[i] < 0) CCpairs[i] = make_pair(-Labels[i]-1,i); else CCpairs[i] = make_pair(Labels[i],i); } free(Labels); intSort::iSort(CCpairs, n, n+1, firstF<uintE,uintE>()); uintE* changes = newA(uintE,n); changes[0] = 0; {parallel_for(long i=1;i<n;i++) changes[i] = (CCpairs[i].first != CCpairs[i-1].first) ? i : UINT_E_MAX;} uintE* CCoffsets = newA(uintE,n); uintE numCC = sequence::filter(changes, CCoffsets, n, nonMaxF()); CCoffsets[numCC] = n; free(changes); t1.stop(); //END COMPUTE CONNECTED COMPONENTS uintE maxS = min((uintE)n,(uintE)sqrt(n*log2(n))); uintE maxSampleSize = max((uintE)10,max((uintE)((n/maxS)*log2(n)),maxS)); //data structures to be shared by all components uintE** Dists = newA(uintE*,maxSampleSize); uintE* Dist = newA(uintE,maxSampleSize*n); {parallel_for(long i=0;i<maxSampleSize;i++) Dists[i] = Dist+i*n;} {parallel_for(long i=0;i<n*maxSampleSize;i++) Dist[i] = UINT_E_MAX;} intPair* wDist = newA(intPair,n); {parallel_for(long i=0;i<n;i++) wDist[i] = make_pair(UINT_E_MAX,UINT_E_MAX);} intPair* minDists = newA(intPair,n); uintE* starts = newA(uintE,n); uintE* starts2 = newA(uintE,n); uintE* maxDists = newA(uintE,n); //BEGIN COMPUTE ECCENTRICITES PER COMPONENT t4.start(); for(long k = 0; k < numCC; k++) { uintE o = CCoffsets[k]; uintE CCsize = CCoffsets[k+1] - o; if(CCsize == 1) ecc[CCpairs[o].second] = 0; //singletons have ecc of 0 if(CCsize == 2) { //size 2 CC's have ecc of 1 ecc[CCpairs[o].second] = ecc[CCpairs[o+1].second] = 1; } else if(CCsize > 1) { //do main computation t2.start(); uintE s = min(CCsize,(uintE)sqrt(CCsize*log2(CCsize))); //pick sample of about \sqrt{n\log n} vertices long sampleSize = min(CCsize,max((uintE)10,(uintE)((CCsize/s)*log2(CCsize)))); //pick random vertices {parallel_for(ulong i=0;i<CCsize;i++) { //pick with probability sampleSize/CCsize uintT index = hash(i+seed) % CCsize; if(index < sampleSize) starts[i] = CCpairs[o+i].second; else starts[i] = UINT_E_MAX; }} //pack down uintE numUnique = sequence::filter(starts,starts2,CCsize,nonMaxF()); //sample cannot be empty! if(numUnique == 0) { starts2[0] = CCpairs[o+(hash(seed)%CCsize)].second; numUnique++; } if(numUnique > maxSampleSize) numUnique = maxSampleSize; //cap at maxSampleSize t2.stop(); t3.start(); //execute BFS per sample {for(long i=0;i<numUnique;i++) { uintE v = starts2[i]; Dists[i][v] = 0; //set source dist to 0 vertexSubset Frontier(n,v); uintE round = 0; while(!Frontier.isEmpty()){ round++; vertexSubset output = edgeMap(GA, Frontier, BFS_F(Dists[i],round),GA.m/20); Frontier.del(); Frontier = output; } Frontier.del(); ecc[v] = round-1; //set radius for sample vertex }} t3.stop(); t4.start(); //store max distance from sample for each vertex so that we can //reuse Distance arrays {parallel_for(long i=0;i<CCsize;i++) { uintE v = CCpairs[o+i].second; //if not one of the vertices we did BFS on if(ecc[v] == UINT_E_MAX) { uintE max_from_sample = 0; //compute max distance from sampled vertex for(long j=0;j<numUnique;j++) { uintE d = Dists[j][v]; if(d > max_from_sample) max_from_sample = d; } maxDists[i] = max_from_sample; }}} t4.stop(); t5.start(); //find furthest vertex from sample set S {parallel_for(long j=0;j<CCsize;j++) { uintE v = CCpairs[o+j].second; uintE m = UINT_E_MAX; for(long i=0;i<numUnique;i++) { uintE d = Dists[i][v]; if(d < m) m = d; if(d == 0) break; } minDists[j] = make_pair(m,v); }} intPair furthest = sequence::reduce<intPair>(minDists,(intE)CCsize,maxFirstF()); uintE w = furthest.second; t5.stop(); t3.start(); //reset Dist array entries {parallel_for(long i=0;i<numUnique;i++) { parallel_for(long j=0;j<CCsize;j++) { uintE v = CCpairs[o+j].second; Dists[i][v] = UINT_E_MAX; } }} t3.stop(); t6.start(); //execute BFS from w and find \sqrt{n log n} neighborhood of w uintE nghSize = min(CCsize,max((uintE)10,s)); uintE* Ngh_s = starts; //reuse starts array bool filled_Ngh = 0; //stores distance from w and index of closest vertex in Ngh_s on //path from w to v wDist[w] = make_pair(0,0); //set source dist to 0 vertexSubset Frontier(n,w); uintE round = 0; uintE numVisited = 0; while(!Frontier.isEmpty()){ round++; if(!filled_Ngh) { Frontier.toSparse(); //Note: if frontier size < nghSize - visited, there is non-determinism in which vertices //get added to Ngh_s as the ordering of vertices on the frontier is non-deterministic {parallel_for(long i=0;i<min(nghSize-numVisited,(uintE)Frontier.numNonzeros());i++) { Ngh_s[numVisited+i] = Frontier.s[i]; wDist[Frontier.s[i]].second = numVisited+i; } numVisited += Frontier.numNonzeros(); if(numVisited >= nghSize) filled_Ngh = 1; }} vertexSubset output = edgeMap(GA, Frontier, BFS_Pair_F(wDist,round),GA.m/20); Frontier.del(); Frontier = output; } Frontier.del(); ecc[w] = round-1; //set radius for w t6.stop(); t7.start(); //execute BFS from each vertex in neighborhood of w uintE** Dists2 = Dists; //reuse distance array uintE* Dist2 = Dist; {for(long i=0;i<nghSize;i++) { uintE v = Ngh_s[i]; Dists2[i][v] = 0; //set source dist to 0 vertexSubset Frontier(n,v); uintE round = 0; while(!Frontier.isEmpty()){ round++; vertexSubset output = edgeMap(GA, Frontier, BFS_F(Dists2[i],round),GA.m/20); Frontier.del(); Frontier = output; } Frontier.del(); ecc[v] = round-1; //set radius of vertex in Ngh_s }} t7.stop(); t8.start(); //min radius of sample parallel_for(long i=0;i<numUnique;i++) starts2[i] = ecc[starts2[i]]; uintE min_r_sample = sequence::reduce<uintE>(starts2,numUnique,minF<uintE>()); //compute ecc values {parallel_for(long i=0;i<CCsize;i++) { uintE v = CCpairs[o+i].second; //if not one of the vertices we did BFS on if(ecc[v] == UINT_E_MAX) { uintE d_vw = wDist[v].first; uintE rv = max(maxDists[i],d_vw); //index in Ngh_s of closest vertex in Ngh_s on path from w to v uintE index_vt = wDist[v].second; uintE vt = Ngh_s[index_vt]; uintE d_vt_v = Dists2[index_vt][v]; uintE d_vt_w = Dists2[index_vt][w]; if(d_vt_v <= d_vt_w) ecc[v] = max(rv,ecc[vt]); else ecc[v] = max(rv,min_r_sample); } }} t8.stop(); t7.start(); //reset Dist array entries {parallel_for(long i=0;i<nghSize;i++) { parallel_for(long j=0;j<CCsize;j++) { uintE v = CCpairs[o+j].second; Dists2[i][v] = UINT_E_MAX; } }} t7.stop(); t6.start(); //reset wDist array entries {parallel_for(long i=0;i<CCsize;i++) { uintE v = CCpairs[o+i].second; wDist[v] = make_pair(UINT_E_MAX,UINT_E_MAX); }} t6.stop(); }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); debug = get_option_int("debug", debug); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; logstream(LOG_INFO) << "M = " << M << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA || distance_metric == PROB) latent_factors_inmem.resize(M); if (distance_metric == PROB) prob_sim_normalization_constant = (double)L / (double)(M*N-L); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* write the matrix market info header to be used later */ FILE * pmm = fopen((training + "-topk:info").c_str(), "w"); if (pmm == NULL) logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl; fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs)); fclose(pmm); /* sort output files */ logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("itemsim2rating2"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); debug = get_option_int("debug", 0); parse_command_line_args(); std::string similarity = get_option_string("similarity", ""); if (similarity == "") logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl; undirected = get_option_int("undirected", 0); mytimer.start(); int nshards = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, °rees); assert(M > 0 && N > 0); prob_sim_normalization_constant = (double)L / (double)(M*N-L); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); set_engine_flags(engine); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s-rec.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } K = get_option_int("K"); assert(K > 0); //run the program engine.run(program, niters); for (uint i=0; i< out_files.size(); i++) fclose(out_files[i]); delete[] relevant_items; /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl; logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl; std::string dname= dirname(strdup(argv[0])); system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); return 0; }
int main(int argc, const char ** argv) { print_copyright(); /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("item-cf"); /* Basic arguments for application */ min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection); distance_metric = get_option_int("distance", JACCARD); asym_cosine_alpha = get_option_float("asym_cosine_alpha", 0.5); if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE) logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl; parse_command_line_args(); mytimer.start(); int nshards = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/); if (nshards != 1) logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl; K = get_option_int("K", K); if (K <= 0) logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl; assert(M > 0 && N > 0); //initialize data structure which saves a subset of the items (pivots) in memory adjcontainer = new adjlist_container(); //array for marking which items are conected to the pivot items via users. relevant_items = new bool[N]; //store node degrees in an array to be used for AA distance metric if (distance_metric == AA || distance_metric == RA) latent_factors_inmem.resize(M); /* Run */ ItemDistanceProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); set_engine_flags(engine); engine.set_maxwindow(M+N+1); //open output files as the number of operating threads out_files.resize(number_of_omp_threads()); for (uint i=0; i< out_files.size(); i++){ char buf[256]; sprintf(buf, "%s.out%d", training.c_str(), i); out_files[i] = open_file(buf, "w"); } //run the program engine.run(program, niters); /* Report execution metrics */ if (!quiet) metrics_report(m); std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl; if (not_enough) logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl; for (uint i=0; i< out_files.size(); i++){ fflush(out_files[i]); fclose(out_files[i]); } std::cout<<"Created " << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; delete[] relevant_items; return 0; }
_seq<intT> setCover(Graph GS) { double epsilon = 0.01; intT m = maxElt(GS); cout << "m = " << m << endl; bucketTime.start(); pair<bucket*, int> B = putInBuckets(GS, epsilon); bucketTime.stop(); bucket* allBuckets = B.first; int numBuckets = B.second; set* S = newA(set, GS.n); // holds sets for current bucket set* ST = newA(set, GS.n); // temporarily S (pack is not inplace) int l = 0; // size of S bool* flag = newA(bool, GS.n); intT* inCover = newA(intT, GS.n); intT nInCover = 0; intT totalWork = 0; intT* elts = newA(intT,m); intT threshold = GS.n; for (int i = 0; i < m; i++) elts[i] = INT_MAX; // loop over all buckets, largest degree first for (int i = numBuckets-1; i >= 0; i--) { bucket currentB = allBuckets[i]; intT degreeThreshold = ceil(pow(1.0+epsilon,i)); if (degreeThreshold == threshold && currentB.n == 0) continue; else threshold = degreeThreshold; packTime.start(); // pack leftover sets that are below threshold down for the next round for (int j = 0; j < l; j++) flag[j] = (S[j].degree > 0 && S[j].degree < threshold); intT ln = sequence::pack(S, ST, flag, l); // pack leftover sets greater than threshold above for this round for (int j = 0; j < l; j++) flag[j] = (S[j].degree >= threshold); intT lb = sequence::pack(S, ST+ln, flag, l); // copy prebucketed bucket i to end, also for this round for (int j = 0; j < currentB.n; j++) ST[j+ln+lb] = currentB.S[j]; lb = lb + currentB.n; // total number in this round l = ln + lb; // total number including those for next round swap(ST,S); // since pack is not in place set* SB = S + ln; // pointer to bottom of sets for this round packTime.stop(); if (lb > 0) { // is there anything to do in this round? manisTime.start(); intT work = processBucket(SB, elts, lb, threshold); totalWork += work; manisTime.stop(); packTime.start(); // check which sets were selected by manis to be in the set cover for (int j = 0; j < lb; j++) flag[j] = SB[j].degree < 0; // add these to inCover and label by their original ID int nNew = sequence::packIndex(inCover+nInCover, flag, lb); for (int j = nInCover; j < nInCover + nNew; j++) inCover[j] = SB[inCover[j]].id; nInCover = nInCover + nNew; packTime.stop(); cout << "i = " << i << " bc = " << currentB.n << " l = " << l << " lb = " << lb << " work = " << work << " new = " << nNew << " threshold = " << threshold << endl; } } cout << "Set cover size = " << nInCover << endl; cout << "Total work = " << totalWork << endl; cout << "Bucket Time = " << bucketTime.total() << endl; cout << "Manis Time = " << manisTime.total() << endl; cout << "Pack Time = " << packTime.total() << endl; free(elts); free(S); free(ST); free(flag); freeBuckets(allBuckets); return _seq<intT>(inCover, nInCover); }
int main(int argc, const char ** argv) { mytimer.start(); logstream(LOG_WARNING)<<"GraphChi Collaborative filtering library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("nmf-inmemory-factors"); /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */ training = get_option_string("training"); // Base filename validation = get_option_string("validation", ""); test = get_option_string("test", ""); knn_sample_percent = get_option_float("knn_sample_percent", 1.0); if (knn_sample_percent <= 0 || knn_sample_percent > 1) logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl; if (validation == "") validation += training + "e"; if (test == "") test += training + "t"; maxval = get_option_float("maxval", 1e100); minval = get_option_float("minval", -1e100); bool quiet = get_option_int("quiet", 0); num_ratings = get_option_int("num_ratings", 10); if (num_ratings <= 0) logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl; debug = get_option_int("debug", 0); if (quiet) global_logger().set_log_level(LOG_ERROR); bool scheduler = false; // Selective scheduling not supported for now. /* Preprocess data if needed, or discover preprocess files */ int nshards = convert_matrixmarket<float>(training); assert(M > 0 && N > 0); latent_factors_inmem.resize(M+N); // Initialize in-memory vertices. max_left_vertex = M-1; max_right_vertex = M+N-1; read_factors<vertex_data>(training + "_U.mm", true); read_factors<vertex_data>(training + "_V.mm", false); if ((uint)num_ratings > N){ logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl; num_ratings = N; } srand(time(NULL)); /* Run */ RatingVerticesInMemProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, scheduler, m); engine.set_modifies_inedges(false); engine.set_modifies_outedges(false); engine.set_disable_vertexdata_storage(); pengine = &engine; engine.run(program, 1); m.set("latent_dimension", (int)D); /* Output latent factor matrices in matrix-market format */ vid_t numvertices = engine.num_vertices(); assert(numvertices == max_right_vertex + 1); // Sanity check output_knn_result(training, numvertices, max_left_vertex); rating_stats(); /* Report execution metrics */ metrics_report(m); return 0; }
int main(int argc, const char *argv[]) { logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any " " comments or bug reports to [email protected] " << std::endl; global_logger().set_log_level(LOG_INFO); global_logger().set_log_to_console(true); graphchi_init(argc, argv); mytimer.start(); outdir = get_option_string("output",""); debug = get_option_int("debug", 0); dir = get_option_string("file_list",""); filename = get_option_string("training",""); lines = get_option_int("lines", 0); omp_set_num_threads(get_option_int("ncpus", 1)); tsv = get_option_int("tsv", 0); //is this tab seperated file? csv = get_option_int("csv", 0); // is the comma seperated file? binary = get_option_int("binary", 0); single_domain = get_option_int("single_domain", 0); has_header_titles = get_option_int("has_header_titles", has_header_titles); ignore_rest_of_line = get_option_int("ignore_rest_of_line", ignore_rest_of_line); mytime.start(); string_to_tokenize = spaces; if (tsv) string_to_tokenize = tsv_spaces; else if (csv) string_to_tokenize = csv_spaces; if (dir != ""){ FILE * f = fopen(dir.c_str(), "r"); if (f == NULL) logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl; while(true){ char buf[256]; int rc = fscanf(f, "%s\n", buf); if (rc < 1) break; in_files.push_back(buf); } } else if (filename != "") in_files.push_back(filename); if (in_files.size() == 0) logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl; #pragma omp parallel for for (uint i=0; i< in_files.size(); i++) parse(i); std::cout << "Finished in " << mytime.current_time() << std::endl; M = string2nodeid.size(); if (single_domain) N = M; else N = string2nodeid2.size(); save_map_to_text_file(string2nodeid, outdir + ".user.map"); if (!single_domain){ save_map_to_text_file(string2nodeid2, outdir + ".item.map"); } std::string filename = "matrix_market.info"; if (in_files.size() == 1) filename = in_files[0] + ".out:info"; logstream(LOG_INFO)<<"Writing matrix market header into file: " << filename << std::endl; out_file fout(filename.c_str()); MM_typecode out_typecode; mm_clear_typecode(&out_typecode); mm_set_integer(&out_typecode); mm_set_sparse(&out_typecode); mm_set_matrix(&out_typecode); mm_write_banner(fout.outf, out_typecode); mm_write_mtx_crd_size(fout.outf, M, N, nnz); return 0; }
void Compute(graph<vertex>& GA, commandLine P) { t1.start(); long start = P.getOptionLongValue("-r",0); if(GA.V[start].getOutDegree() == 0) { cout << "starting vertex has degree 0" << endl; return; } const int procs = P.getOptionIntValue("-p",0); if(procs > 0) setWorkers(procs); const double t = P.getOptionDoubleValue("-t",3); const double epsilon = P.getOptionDoubleValue("-e",0.000000001); const uintE N = P.getOptionIntValue("-N",1); const intE n = GA.n; const double constant = exp(t)*epsilon/(2*(double)N); double* psis = newA(double,N); double* fact = newA(double,N); fact[0] = 1; for(long k=1;k<N;k++) fact[k] = k*fact[k-1]; double* tm = newA(double,N); {parallel_for(long m=0;m<N;m++) tm[m] = pow(t,m);} {parallel_for(long k=0;k<N;k++) { psis[k] = 0; for(long m=0;m<N-k;m++) psis[k] += fact[k]*tm[m]/(double)fact[m+k]; }} sparseAdditiveSet<float> x = sparseAdditiveSet<float>(10000,1,0.0); sparseAdditiveSet<float> r = sparseAdditiveSet<float>(2,1,0.0); x.insert(make_pair(start,0.0)); r.insert(make_pair(start,1.0)); vertexSubset Frontier(n,start); long j = 0, totalPushes = 0; while(Frontier.numNonzeros() > 0){ totalPushes += Frontier.numNonzeros(); uintT* Degrees = newA(uintT,Frontier.numNonzeros()); {parallel_for(long i=0;i<Frontier.numNonzeros();i++) Degrees[i] = GA.V[Frontier.s[i]].getOutDegree();} long totalDegree = sequence::plusReduce(Degrees,Frontier.numNonzeros()); free(Degrees); if(j+1 < N) { long rCount = r.count(); //make bigger hash table initialized to 0.0's sparseAdditiveSet<float> new_r = sparseAdditiveSet<float>(max(100L,min((long)n,totalDegree+rCount)),LOAD_FACTOR,0.0); vertexMap(Frontier,Local_Update(x,r)); vertexSubset output = edgeMap(GA, Frontier, HK_F<vertex>(x,r,new_r,GA.V,t/(double)(j+1))); r.del(); r = new_r; if(x.m < ((uintT) 1 << log2RoundUp((uintT)(LOAD_FACTOR*min((long)n,rCount+output.numNonzeros()))))) { sparseAdditiveSet<float> new_x = sparseAdditiveSet<float>(LOAD_FACTOR*min((long)n,rCount+output.numNonzeros()),LOAD_FACTOR,0.0); //make bigger hash table new_x.copy(x); x.del(); x = new_x; } output.del(); //compute active set (faster in practice to just scan over r) _seq<ACLpair> vals = r.entries(activeF<vertex>(GA.V,constant/psis[j+1])); uintE* Active = newA(uintE,vals.n); parallel_for(long i=0;i<vals.n;i++) Active[i] = vals.A[i].first; Frontier.del(); vals.del(); Frontier = vertexSubset(n,vals.n,Active); j++; } else { //last iteration
int main(int argc, const char ** argv) { /* GraphChi initialization will read the command line arguments and the configuration file. */ graphchi_init(argc, argv); /* Metrics object for keeping track of performance counters and other information. Currently required. */ metrics m("connected-components-inmem"); /* Basic arguments for application */ std::string filename = get_option_string("file"); // Base filename int niters = get_option_int("niters", 100); // Number of iterations (max) int output_labels = get_option_int("output_labels", 0); //output node labels to file? bool scheduler = true; // Always run with scheduler /* Process input file - if not already preprocessed */ float p = get_option_float("p", -1); int n = get_option_int("n", -1); int quiet = get_option_int("quiet", 0); if (quiet) global_logger().set_log_level(LOG_ERROR); int nshards = (int) convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto")); mytimer.start(); /* Run */ ConnectedComponentsProgram program; graphchi_engine<VertexDataType, EdgeDataType> engine(filename, nshards, scheduler, m); engine.set_disable_vertexdata_storage(); engine.set_enable_deterministic_parallelism(false); engine.set_modifies_inedges(false); engine.set_modifies_outedges(false); engine.set_preload_commit(false); engine.set_maxwindow(engine.num_vertices()); mytimer.start(); active_nodes = new bool[engine.num_vertices()]; for (int i=0; i< engine.num_vertices(); i++) active_nodes[i] = true; engine.run(program, niters); /* Run analysis of the connected components (output is written to a file) */ if (output_labels){ FILE * pfile = fopen((filename + "-components").c_str(), "w"); if (!pfile) logstream(LOG_FATAL)<<"Failed to open file: " << filename << std::endl; fprintf(pfile, "%%%%MatrixMarket matrix array real general\n"); fprintf(pfile, "%lu %u\n", engine.num_vertices()-1, 1); for (uint i=1; i< engine.num_vertices(); i++){ fprintf(pfile, "%u\n", vertex_values[i]); assert(vertex_values[i] >= 0 && vertex_values[i] < engine.num_vertices()); } fclose(pfile); logstream(LOG_INFO)<<"Saved succesfully to out file: " << filename << "-components" << " time for saving: " << mytimer.current_time() << std::endl; } std::cout<<"Total runtime: " << mytimer.current_time() << std::endl; if (p > 0) std::cout << "site fraction p= " << p << std::endl; if (n > 0){ std::cout << "n=" << n*p << std::endl; std::cout << "isolated sites: " << p*(double)n-actual_vertices << std::endl; } std::cout << "Number of sites: " << actual_vertices << std::endl; std::cout << "Number of bonds: " << engine.num_edges() << std::endl; if (n){ std::cout << "Percentage of sites: " << (double)actual_vertices / (double)n << std::endl; std::cout << "Percentage of bonds: " << (double)engine.num_edges() / (2.0*n) << std::endl; } std::cout << "Number of iterations: " << iter << std::endl; std::cout << "SITES RESULT:\nsize\tcount\n"; std::map<uint,uint> final_countsv; std::map<uint,uint> final_countse; std::map<uint,uint> statv; for (int i=0; i< engine.num_vertices(); i++) statv[vertex_values[i]]++; uint total_sites = 0; for (std::map<uint, uint>::const_iterator iter = statv.begin(); iter != statv.end(); iter++) { //std::cout << iter->first << "\t" << iter->second << "\n"; final_countsv[iter->second] += 1; total_sites += iter->second; } for (std::map<uint, uint>::const_iterator iter = final_countsv.begin(); iter != final_countsv.end(); iter++) { std::cout << iter->first << "\t" << iter->second << "\n"; } edge_count = 1; engine.run(program, 1); std::cout << "BONDS RESULT:\nsize\tcount\n"; uint total_bonds = 0; for (std::map<uint, uint>::const_iterator iter = state.begin(); iter != state.end(); iter++) { //std::cout << iter->first << "\t" << iter->second << "\n"; final_countse[iter->second] += 1; total_bonds += iter->second; } for (std::map<uint, uint>::const_iterator iter = final_countse.begin(); iter != final_countse.end(); iter++) { std::cout << iter->first << "\t" << iter->second << "\n"; } assert(total_sites == graph.num_vertices()); assert(total_bonds == graph.num_edges()); return 0; }
/* The main drawing function. */ void DrawGLScene(void) { const_move(delta.get_ticks()); delta.start(); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer glLoadIdentity(); // Reset The View glTranslatef(-(gridsize/2) +1, 0, -30); //glRotatef(zrot, 0.0f, 0.0f, 1.0f); // choose the texture to use glBindTexture(GL_TEXTURE_2D, texture[0]); //glLoadIdentity(); for(float _xtrans_l =0; _xtrans_l < gridsize; _xtrans_l+=2) { for(float _ztrans_l =0; _ztrans_l < gridsize; _ztrans_l+=2) { //glPushMatrix(); glDisable(GL_TEXTURE_2D); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glColor3ub(144, 213, 225); glLineWidth(1); glPushMatrix(); // rotate triangle on y axis /**glRotatef(xrot, 0.0f, 1.0f, 0.0f);*/ //glRotatef(0, 0.0f, 0.0f, 0.0f); glTranslatef(_xtrans_l, _ytrans, _ztrans_l); if(_xtrans_l != gridsize) { glBegin(GL_LINES); glVertex3f(-1.0f, 0.0f, -1.0f); glVertex3f(1.0f, 0.0f, -1.0f); glEnd(); glBegin(GL_LINES); glVertex3f(-1.0f, 0.0f, 1.0f); glVertex3f(1.0f, 0.0f, 1.0f); glEnd(); } // { // glBegin(GL_LINES); // glVertex3f(0.0f, 0.0f, 0.0f); // glVertex3f(2.0f, 0.0f, 0.0f); // glEnd(); // } if(_ztrans_l != gridsize) { glBegin(GL_LINES); glVertex3f(-1.0f, 0.0f, -1.0f); glVertex3f(-1.0f, 0.0f, 1.0f); glEnd(); glBegin(GL_LINES); glVertex3f(1.0f, 0.0f, -1.0f); glVertex3f(1.0f, 0.0f, 1.0f); glEnd(); } // { // glBegin(GL_LINES); // glVertex3f(0.0f, 0.0f, 0.0f); // glVertex3f(0.0f, 0.0f, 2.0f); // glEnd(); // } glPopMatrix(); } } glPushMatrix(); glTranslatef(_xtrans, _ytrans, _ztrans); if(dir_array[0]) { //up glRotatef(90, 0, 1, 0); } else if(dir_array[1]) { //right glRotatef(0, 0, 1, 0); } else if(dir_array[2]) { //down glRotatef(-90, 0, 1, 0); } else if(dir_array[3]) { //left glRotatef(180, 0, 1, 0); } //draw cube for player palceholder glBegin(GL_QUADS); //bottom colour glColor3ub(144, 213, 225); //bottom glVertex3f(-1.0f, 0.0f, -1.0f); glVertex3f(1.0f, 0.0f, -1.0f); glVertex3f(1.0f, 0.0f, 1.0f); glVertex3f(-1.0f, 0.0f, 1.0f); //left colour glColor3ub(144, 213, 225); //left glVertex3f(-1.0f, 0.0f, -1.0f); glVertex3f(-1.0f, 0.0f, 1.0f); glVertex3f(-1.0f, 2.0f, 1.0f); glVertex3f(-1.0f, 2.0f, -1.0f); //top colour glColor3ub(144, 213, 225); //top glVertex3f(-1.0f, 2.0f, -1.0f); glVertex3f(-1.0f, 2.0f, 1.0f); glVertex3f(1.0f, 2.0f, 1.0f); glVertex3f(1.0f, 2.0f, -1.0f); //right colour glColor3ub(144, 213, 225); //right bottom glVertex3f(1.0f, 0.0f, -1.0f); glVertex3f(2.0f, 1.0f, -1.0f); glVertex3f(2.0f, 1.0f, 1.0f); glVertex3f(1.0f, 0.0f, 1.0f); //right colour glColor3ub(144, 213, 225); // right top glVertex3f(1.0f, 2.0f, -1.0f); glVertex3f(2.0f, 1.0f, -1.0f); glVertex3f(2.0f, 1.0f, 1.0f); glVertex3f(1.0f, 2.0f, 1.0f); //front colour glColor3ub(144, 213, 225); //front glVertex3f(-1.0f, 0.0f, 1.0f); glVertex3f(1.0f, 0.0f, 1.0f); glVertex3f(1.0f, 2.0f, 1.0f); glVertex3f(-1.0f, 2.0f, 0.0f); //back colour glColor3ub(144, 213, 225); //back glVertex3f(-1.0f, 0.0f, -1.0f); glVertex3f(-1.0f, 2.0f, -1.0f); glVertex3f(1.0f, 2.0f, -1.0f); glVertex3f(1.0f, 0.0f, -1.0f); glEnd(); glPopMatrix(); //push mid points of top and bottom of cube into vector // top // int vertexarray[12] = {}; // trail_vector.push_back(); glDisable(GL_BLEND); glEnable(GL_TEXTURE_2D); //count = 0; float _xtrans_int = _xtrans; if( _xtrans_int > 2) { count ++; _xtrans_int = 0; } std::cout << _xtrans_int << std::endl; for(int i = 0; i < count; i++) { glPushMatrix(); drawtrail(count); glPopMatrix(); } // since this is double buffered, swap the buffers to display what just got drawn. glutSwapBuffers(); }