int diagonalize_bisection(localized_matrix<double, MATRIX_MAJOR>& mata, localized_matrix<double, MATRIX_MAJOR>& matb,
			  double* eigvals,
			  rokko::parameters const& params, timer& timer) {
  rokko::parameters params_out;
  char jobz = 'N';  // only eigenvalues
  int dim = mata.innerSize();
  int lda = mata.outerSize();
  int ldb = matb.outerSize();
  lapack_int m;  // output: found eigenvalues
  double abstol;
  get_key(params, "abstol", abstol);
  if (abstol < 0) {
    std::cerr << "Error in diagonalize_bisection" << std::endl
	      << "abstol is negative value, which means QR method." << std::endl
	      << "To use dsygvx as bisection solver, set abstol a positive value" << std::endl;
    throw;
  }
  if (!params.defined("abstol")) {  // default: optimal value for bisection method
    abstol = 2 * LAPACKE_dlamch('S');
  }
  params_out.set("abstol", abstol);
  char uplow = get_matrix_part(params);

  lapack_int il, iu;
  double vl, vu;
  char range = get_eigenvalues_range(params, vl, vu, il, iu);

  std::vector<lapack_int> ifail(dim);
  timer.start(timer_id::diagonalize_diagonalize);
  int info;
  if(mata.is_col_major())
    info = LAPACKE_dsygvx(LAPACK_COL_MAJOR, 1, jobz, range, uplow, dim,
			  &mata(0,0), lda, &matb(0,0), ldb, vl, vu, il, iu,
			  abstol, &m, eigvals, NULL, lda, &ifail[0]);
  else
    info = LAPACKE_dsygvx(LAPACK_ROW_MAJOR, 1, jobz, range, uplow, dim,
			  &mata(0,0), lda, &matb(0,0), ldb, vl, vu, il, iu,
			  abstol, &m, eigvals, NULL, lda, &ifail[0]);
  timer.stop(timer_id::diagonalize_diagonalize);
  timer.start(timer_id::diagonalize_finalize);
  if (info) {
    std::cerr << "error at dsygvx function. info=" << info << std::endl;
    if (info < 0) {
      std::cerr << "This means that ";
      std::cerr << "the " << abs(info) << "-th argument had an illegal value." << std::endl;
    }
    exit(1);
  }
  params_out.set("m", m);
  params_out.set("ifail", ifail);
  
  if (params.get_bool("verbose")) {
    print_verbose("dsygvx (bisection)", jobz, range, uplow, vl, vu, il, iu, params_out);
  }
  timer.stop(timer_id::diagonalize_finalize);
  return info;
}
Example #2
0
bool ripng_router::check_startup() {
	if (!router::check_startup())
		return false;

	int sock = socket(PF_INET6, SOCK_DGRAM, 0);
	if (sock < 0)
		return false;

	sockaddr_in6 local;
	memset(&local, 0, sizeof(local));
	local.sin6_family = AF_INET6;
	local.sin6_port = htons(522);

	if (bind(sock, (sockaddr *)&local, sizeof(local)) < 0) {
		if (should_log(WARNING))
			log().perror("Failed to bind");
		close(sock);
		return false;
	}

	if (!m_sock.register_fd(sock)) {
		close(sock);
		return false;
	}

	if (!m_sock.enable_mc_loop(false))
		return false;

	g_mrd->mrib().install_listener(this);

	m_garbcol_timer.start();

	return true;
}
Example #3
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* CE_Graph initialization will read the command line 
     arguments and the configuration file. */
  CE_Graph_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf2");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);

  distance_metric          = get_option_int("distance", JACCARD_WEIGHT);
      if (distance_metric != JACCARD_WEIGHT)
    logstream(LOG_FATAL)<<"--distance_metrix=XX should be one of:9= JACCARD_WEIGHT" << std::endl;
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();

  //if (distance_metric != JACKARD && distance_metric != AA && distance_metric != RA)
  //  logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACKARD, 1) AA, 2) RA" << std::endl;  

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, true);

  assert(M > 0 && N > 0);

  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  /* Run */
  ItemDistanceProgram program;
  CE_Graph_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl;

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  std::cout<<"Created output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  return 0;
}
int main(int argc, const char ** argv) {
    print_copyright();

    /* GraphChi initialization will read the command line
       arguments and the configuration file. */
    graphchi_init(argc, argv);

    /* Metrics object for keeping track of performance counters
       and other information. Currently required. */
    metrics m("itemsim2rating2");

    /* Basic arguments for application */
    min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
    debug                    = get_option_int("debug", 0);
    parse_command_line_args();
    std::string similarity   = get_option_string("similarity", "");
    if (similarity == "")
        Rcpp::Rcerr<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl;
    undirected               = get_option_int("undirected", 1);
    Q                        = get_option_float("Q", Q);
    K 			   = get_option_int("K");

    mytimer.start();
    vec unused;
    int nshards          = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, unused);

    assert(M > 0 && N > 0);

    //initialize data structure which saves a subset of the items (pivots) in memory
    adjcontainer = new adjlist_container();

    //array for marking which items are conected to the pivot items via users.
    relevant_items = new bool[N];

    /* Run */
    ItemDistanceProgram program;
    graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m);
    set_engine_flags(engine);

    out_file = open_file((training + "-rec").c_str(), "w");

    //run the program
    engine.run(program, niters);

    /* Report execution metrics */
    if (!quiet)
        metrics_report(m);

    Rcpp::Rcout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << written_pairs << std::endl;

    if (zero_edges)
        Rcpp::Rcout<<"Found: " << zero_edges<< " user edges with weight zero. Those are ignored." <<std::endl;

    delete[] relevant_items;
    fclose(out_file);
    return 0;
}
Example #5
0
/**
 * Prepare and runTimer a single statement of SQL.
 */
void prepareAndRun(sqlite3 *db, string stmt, timer &t) {
	sqlite3_stmt *pStmt;

	t.start();
	checkErr(sqlite3_prepare_v2(db, stmt.c_str(), -1, &pStmt, NULL), __LINE__, db, stmt);
	while (checkErr(sqlite3_step(pStmt), __LINE__, db, stmt) == SQLITE_ROW);
	checkErr(sqlite3_finalize(pStmt), __LINE__, db, stmt);
	t.end();
}
void Compute(graph<vertex>& GA, commandLine P) {
  t1.start();
  long start = P.getOptionLongValue("-r",0);
  if(GA.V[start].getOutDegree() == 0) { 
    cout << "starting vertex has degree 0" << endl;
    return;
  }
  const uintE K = P.getOptionIntValue("-K",10);
  const uintE N = P.getOptionIntValue("-N",10);
  const double t = P.getOptionDoubleValue("-t",3);
  srand (time(NULL));
  uintE seed = rand();
  const intE n = GA.n;

  //walk length probabilities
  double* fact = newA(double,K);
  fact[0] = 1;
  for(long k=1;k<K;k++) fact[k] = k*fact[k-1];
  double* probs = newA(double,K);
  for(long k=0;k<K;k++) probs[k] = exp(-t)*pow(t,k)/fact[k];

  unordered_map<uintE,double> p;
  for(long i=0;i<N;i++) {
    double randDouble = (double) hashInt(seed++) / UINT_E_MAX;
    long j = 0;
    double mass = 0;
    uintE x = start;
    do {
      mass += probs[j];
      if(randDouble < mass) break;
      x = walk(x,GA.V,seed++);
      j++;
    } while(j <= K);
    p[x]++;
  }
  for(auto it=p.begin();it!=p.end();it++) {
    p[it->first] /= N;
  }

  free(probs); free(fact);
  t1.stop();
  pairIF* A = newA(pairIF,p.size());

  long numNonzerosQ = 0;
  for(auto it = p.begin(); it != p.end(); it++) {
    A[numNonzerosQ++] = make_pair(it->first,it->second);
  }
  sweepObject sweep = sweepCut(GA,A,numNonzerosQ,start);
  free(A);
  cout << "number of vertices touched = " << p.size() << endl;
  cout << "number of edges touched = " << sweep.vol << endl;
  cout << "conductance = " << sweep.conductance << " |S| = " << sweep.sizeS << " vol(S) = " << sweep.volS << " edgesCrossing = " << sweep.edgesCrossing << endl; 
  t1.reportTotal("computation time");
}
int shrimp_gateway_impl::read_reply() {

    m_timer_status = 0;
    m_io_timer.start();
    char* r = new char[10];
    int rd = m_shrimp.Readv(r, 1, &m_timer_status);
    //printf("r: %i\n", r[0]);
    m_reply = r[0];
    //printf("byte read %i\n", rd);
    return rd;
}
int shrimp_gateway_impl::send_command(const shrimp_command_t& command) {

    m_timer_status = 0;
    m_io_timer.start();

    unsigned char* cmd = new unsigned char[3];
    memcpy(cmd, command.to_buf(), command.get_size());
    //printf("%i %i %i", cmd[0], cmd[1], cmd[2]);

    int wd = m_shrimp.Writev(command.to_buf(), command.get_size(), &m_timer_status);
    //printf("bytes sent %i\n", wd);
    return wd;
}
Example #9
0
int main(int argc,  const char *argv[]) {

  logstream(LOG_WARNING)<<"CE_Graph parsers library is written by Danny Bickson (c). Send any "
    " comments or bug reports to [email protected] " << std::endl;
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  CE_Graph_init(argc, argv);

  debug = get_option_int("debug", 0);
  dir = get_option_string("file_list");
  lines = get_option_int("lines", 0);
  omp_set_num_threads(get_option_int("ncpus", 1));
  mytime.start();

  FILE * f = fopen(dir.c_str(), "r");
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

  while(true){
    char buf[256];
    int rc = fscanf(f, "%s\n", buf);
    if (rc < 1)
      break;
    in_files.push_back(buf);
  }

  if (in_files.size() == 0)
    logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

#pragma omp parallel for
  for (uint i=0; i< in_files.size(); i++)
    parse(i);

  std::cout << "Finished in " << mytime.current_time() << std::endl << "\t direct tweets found: " << links_found  <<
    " \t global tweets: " << wide_tweets << 
    "\t http links: " << http_links << 
    "\t retweets: " << retweet_found <<
    "\t total lines in input file : " << total_lines << 
    " \t invalid records (missing names) " << missing_names <<  std::endl;

  save_map_to_text_file(string2nodeid, outdir + "map.text");
  save_map_to_text_file(nodeid2hash, outdir + "reverse.map.text");
  save_map_to_text_file(tweets_per_user, outdir + "tweets_per_user.text");

  out_file fout("mm.info");
  fprintf(fout.outf, "%%%%MatrixMarket matrix coordinate real general\n");
  fprintf(fout.outf, "%u %u %lu\n", maxfrom+1, maxto+1, links_found);
  return 0;
}
Example #10
0
int main(int argc,  const char *argv[]) {

	logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);

	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	from_val = get_option_int("from_val", from_val);
	to_val = get_option_int("to_val", to_val);
	mid_val = get_option_int("mid_val", mid_val);
	if (from_val == -1)
		logstream(LOG_FATAL)<<"Must set from/to " << std::endl;
	mytime.start();

	FILE * f = fopen(dir.c_str(), "r");
	if (f == NULL)
		logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

	while(true){
		char buf[256];
		int rc = fscanf(f, "%s\n", buf);
		if (rc < 1)
			break;
		in_files.push_back(buf);
	}

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (int i=0; i< (int)in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;

	save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text");
	return 0;
}
Example #11
0
int main(int argc,  const char *argv[]) {

  Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
    " comments or bug reports to [email protected] " << std::endl;
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  graphchi_init(argc, argv);

  debug = get_option_int("debug", 0);
  dir = get_option_string("file_list");
  lines = get_option_int("lines", 0);
  omp_set_num_threads(get_option_int("ncpus", 1));
  mytime.start();

  FILE * f = fopen(dir.c_str(), "r");
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

  while(true){
    char buf[256];
    int rc = fscanf(f, "%s\n", buf);
    if (rc < 1)
      break;
    in_files.push_back(buf);
  }

  if (in_files.size() == 0)
    logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

//#pragma omp parallel for
  for (uint i=0; i< in_files.size(); i++)
    parse(i);

  std::cout << "Finished in " << mytime.current_time() << std::endl << 
    "\t total lines in input file : " << total_lines <<  "\t max from: " << maxfrom << "\t max to: " <<maxto << std::endl;

  return 0;
}
Example #12
0
void Compute(graph<vertex>& GA, commandLine P) {
  t5.start();
  long length = P.getOptionLongValue("-r",0); //number of words per vertex
  char* oFile = P.getOptionValue("-out"); //file to write eccentricites
  srand (time(NULL));
  uintT seed = rand();
  cout << "seed = " << seed << endl;
  t0.start();
  long n = GA.n;
  
  uintE* ecc = newA(uintE,n);
  uintE* ecc2 = newA(uintE,n);
  {parallel_for(long i=0;i<n;i++) {
      ecc[i] = ecc2[i] = 0;
    }}

  t0.stop();

  //BEGIN COMPUTE CONNECTED COMPONENTS
  t1.start();
  intE* Labels = newA(intE,n);
  {parallel_for(long i=0;i<n;i++) {
    if(GA.V[i].getOutDegree() == 0) Labels[i] = -i-1; //singletons
    else Labels[i] = INT_E_MAX;
    }}

  //get max degree vertex
  uintE maxV = sequence::reduce<uintE>((intE)0,(intE)n,maxF<intE>(),getDegree<vertex>(GA.V));

  //visit large component with BFS
  CCBFS(maxV,GA,Labels);
  //visit small components with label propagation
  Components(GA, Labels);

  //sort by component ID
  intPair* CCpairs = newA(intPair,n);
  {parallel_for(long i=0;i<n;i++)
    if(Labels[i] < 0)
      CCpairs[i] = make_pair(-Labels[i]-1,i);
    else CCpairs[i] = make_pair(Labels[i],i);
  }
  free(Labels);

  intSort::iSort(CCpairs, n, n+1,firstF<uintE,uintE>());

  uintE* changes = newA(uintE,n);
  changes[0] = 0;
  {parallel_for(long i=1;i<n;i++) 
      changes[i] = (CCpairs[i].first != CCpairs[i-1].first) ? i : UINT_E_MAX;}

  uintE* CCoffsets = newA(uintE,n);
  uintE numCC = sequence::filter(changes, CCoffsets, n, nonMaxF());
  CCoffsets[numCC] = n;
  free(changes);
  t1.stop();
  //END COMPUTE CONNECTED COMPONENTS

  //init data structures
  t0.start();
  length = max((long)1,min((n+63)/64,(long)length));
  long* VisitedArray = newA(long,n*length);
  long* NextVisitedArray = newA(long,n*length); 
  int* flags = newA(int,n);
  {parallel_for(long i=0;i<n;i++) flags[i] = -1;}
  uintE* starts = newA(uintE,n);
  intPair* pairs = newA(intPair,n);
  t0.stop();

  //BEGIN COMPUTE ECCENTRICITES PER COMPONENT
  for(long k = 0; k < numCC; k++) {
    t2.start();
    uintE o = CCoffsets[k];
    uintE CCsize = CCoffsets[k+1] - o;
    if(CCsize == 2) { //size 2 CC's have ecc of 1
      ecc[CCpairs[o].second] = ecc[CCpairs[o+1].second] = 1;
      t2.stop();
    } else if(CCsize > 1) { //size 1 CC's already have ecc of 0
      //do main computation
      long myLength = min((long)length,((long)CCsize+63)/64);

      //initialize bit vectors for component vertices
      {parallel_for(long i=0;i<CCsize;i++) {
	uintT v = CCpairs[o+i].second;
	parallel_for(long j=0;j<myLength;j++)
	  VisitedArray[v*myLength+j] = NextVisitedArray[v*myLength+j] = 0;
	}}

      long sampleSize = min((long)CCsize,(long)64*myLength);

      uintE* starts2 = newA(uintE,sampleSize);

      //pick random vertices (could have duplicates)
      {parallel_for(ulong i=0;i<sampleSize;i++) {
	uintT index = hashInt(i+seed) % CCsize;
	if(flags[index] == -1 && CAS(&flags[index],-1,(int)i)) {
	  starts[i] = CCpairs[o+index].second;
	  NextVisitedArray[CCpairs[o+index].second*myLength + i/64] = (long) 1<<(i%64);
	} else starts[i] = UINT_E_MAX;
	}}

      //remove duplicates
      uintE numUnique = sequence::filter(starts,starts2,sampleSize,nonMaxF());

      //reset flags
      parallel_for(ulong i=0;i<sampleSize;i++) {
	uintT index = hashInt(i+seed) % CCsize;
	if(flags[index] == i) flags[index] = -1;
      }

      //first phase
      vertexSubset Frontier(n,numUnique,starts2); //initial frontier
      //note: starts2 will be freed inside the following loop
      uintE round = 0;
      while(!Frontier.isEmpty()){
	round++;
	vertexMap(Frontier, Ecc_Vertex_F(myLength,VisitedArray,NextVisitedArray));
	vertexSubset output = 
	  edgeMap(GA, Frontier, 
		  Ecc_F(myLength,VisitedArray,NextVisitedArray,ecc,round),
		  GA.m/20);
	Frontier.del();
	Frontier = output;
      }
      Frontier.del();
      t2.stop();
      //second phase if size of CC > 64
      if(CCsize > 1024) {
	//sort by ecc
	t3.start();
	{parallel_for(long i=0;i<CCsize;i++) {
	  pairs[i] = make_pair(ecc[CCpairs[o+i].second],CCpairs[o+i].second);
	  }}
	intPair maxR = sequence::reduce(pairs,CCsize,maxFirstF());
	intSort::iSort(pairs, CCsize, 1+maxR.first, firstF<uintE,uintE>());
	t3.stop();

	t4.start();

	//reset bit vectors for component vertices
	{parallel_for(long i=0;i<CCsize;i++) {
	  uintT v = CCpairs[o+i].second;
	  parallel_for(long j=0;j<myLength;j++)
	    VisitedArray[v*myLength+j] = NextVisitedArray[v*myLength+j] = 0;
	  }}

	starts2 = newA(uintE,sampleSize);
	//pick starting points with highest ecc ("fringe" vertices)
	{parallel_for(long i=0;i<sampleSize;i++) {
	  intE v = pairs[CCsize-i-1].second;
	  starts2[i] = v;
	  NextVisitedArray[v*myLength + i/64] = (long) 1<<(i%64);
	  }}

	vertexSubset Frontier2(n,sampleSize,starts2); //initial frontier
	//note: starts2 will be freed inside the following loop
	round = 0;
	while(!Frontier2.isEmpty()){
	  round++;
	  vertexMap(Frontier2, Ecc_Vertex_F(myLength,VisitedArray,NextVisitedArray));
	  vertexSubset output = 
	    edgeMap(GA, Frontier2,Ecc_F(myLength,VisitedArray,NextVisitedArray,ecc2,round), GA.m/20);
	  Frontier2.del();
	  Frontier2 = output;
	}
	Frontier2.del();
	{parallel_for(long i=0;i<n;i++) ecc[i] = max(ecc[i],ecc2[i]);}
	t4.stop();
      }
    }
Example #13
0
int main(int argc,  const char *argv[]) {

	Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);

	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	from_val = get_option_int("from_val", from_val);
	to_val = get_option_int("to_val", to_val);
	if (from_val == -1)
		logstream(LOG_FATAL)<<"Must set from/to " << std::endl;
	mytime.start();

	FILE * f = fopen(dir.c_str(), "r");
	if (f == NULL)
		logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

	while(true){
		char buf[256];
		int rc = fscanf(f, "%s\n", buf);
		if (rc < 1)
			break;
		in_files.push_back(buf);
	}

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (int i=0; i< (int)in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;

	int total_x =0 , total_y = 0;
	std::map<std::string, int>::iterator it;
	double h = 0;
	for (it = p_x.begin(); it != p_x.end(); it++){
		total_x+= it->second;
		h-= (it->second / (double)n)*log2(it->second / (double)n);
	}
	for (it = p_y.begin(); it != p_y.end(); it++)
		total_y+= it->second;
	assert(total_x == n);
	assert(total_y == n);


	double mi = 0;
	std::map<std::string, uint>::iterator iter;
	assert(n != 0);

	int total_p_xy = 0;
	for (iter = frommap.string2nodeid.begin() ; iter != frommap.string2nodeid.end(); iter++){
		double p_xy = iter->second / (double)n;
		assert(p_xy > 0);
		char buf[256];
		strncpy(buf, iter->first.c_str(), 256);
		char * first = strtok(buf, "_");
		char * second = strtok(NULL, "\n\r ");
		assert(first && second);
		double px = p_x[first] / (double)n;
		double py = p_y[second] / (double)n;
		assert(px > 0 && py > 0);
		mi += p_xy * log2(p_xy / (px * py));
		total_p_xy += iter->second;
	}
	assert(total_p_xy == n);
	logstream(LOG_INFO)<<"Total examples: " <<n << std::endl;

	logstream(LOG_INFO)<<"Unique p(x) " << p_x.size() << std::endl;
	logstream(LOG_INFO)<<"Unique p(y) " << p_y.size() << std::endl;
	logstream(LOG_INFO)<<"Average F(x) " << total_x / (double)p_x.size() << std::endl;
	logstream(LOG_INFO)<<"Average F(y) " << total_y / (double)p_y.size() << std::endl;

	std::cout<<"Mutual information of " << from_val << " [" << header_titles[from_val-1] << "] <-> " << to_val << " [" << header_titles[to_val-1] << "] is: " ;
	if (mi/h > 1e-3) 
		std::cout<<std::setprecision(3) << mi << std::endl;
	else std::cout<<"-"<<std::endl;
	save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text");
	logstream(LOG_INFO)<<"Saving map file " << outdir << dir << "map.text" << std::endl;
	return 0;
}
Example #14
0
int main(int argc, const char ** argv) {

  mytimer.start();
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("rating2");

  knn_sample_percent = get_option_float("knn_sample_percent", 1.0);
  if (knn_sample_percent <= 0 || knn_sample_percent > 1)
    logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl;

  num_ratings   = get_option_int("num_ratings", 10);
  if (num_ratings <= 0)
    logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl;

  debug         = get_option_int("debug", 0);
  tokens_per_row = get_option_int("tokens_per_row", tokens_per_row);
  std::string algorithm     = get_option_string("algorithm");
  /* Basic arguments for RBM algorithm */
  rbm_bins      = get_option_int("rbm_bins", rbm_bins);
  rbm_scaling   = get_option_float("rbm_scaling", rbm_scaling);

  if (algorithm == "svdpp" || algorithm == "svd++")
    algo = SVDPP;
  else if (algorithm == "biassgd")
    algo = BIASSGD;
  else if (algorithm == "rbm")
    algo = RBM;
  else logstream(LOG_FATAL)<<"--algorithm should be svd++ or biassgd or rbm"<<std::endl;

  parse_command_line_args();

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = 0;
  if (tokens_per_row == 3)
    nshards = convert_matrixmarket<edge_data>(training, 0, 0, 3, TRAINING, false);
  else if (tokens_per_row == 4)
    nshards = convert_matrixmarket4<edge_data4>(training);
  else logstream(LOG_FATAL)<<"--tokens_per_row should be either 3 or 4" << std::endl;

  assert(M > 0 && N > 0);
  latent_factors_inmem.resize(M+N); // Initialize in-memory vertices.

  //initialize data structure to hold the matrix read from file
  if (algo == RBM){
#pragma omp parallel for
    for (uint i=0; i< M+N; i++){
      if (i < M){
        latent_factors_inmem[i].pvec = zeros(D*3);
      }
      else {  
        latent_factors_inmem[i].pvec = zeros(rbm_bins + rbm_bins * D);
      }
    } 
  }
 
  read_factors(training);
  if ((uint)num_ratings > N){
    logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl;
    num_ratings = N;
  }
  srand(time(NULL));

  /* Run */
  if (tokens_per_row == 3){
    RatingVerticesInMemProgram<VertexDataType, EdgeDataType> program;
    graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  } 
  else if (tokens_per_row == 4){
    RatingVerticesInMemProgram<VertexDataType, edge_data4> program;
    graphchi_engine<VertexDataType, edge_data4> engine(training, nshards, false, m); 
    set_engine_flags(engine);
    engine.run(program, 1);
  }
  /* Output latent factor matrices in matrix-market format */
  output_knn_result(training);

  rating_stats();

  if (users_without_ratings > 0)
    logstream(LOG_WARNING)<<"Found " << users_without_ratings << " without ratings. For those users no items are recommended (item id 0)" << std::endl;

  if (users_no_ratings > 0)
    logstream(LOG_WARNING)<<"Failed to compute ratings for " << users_no_ratings << " Users. For those users no items are recommended (item id 0)" << std::endl;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  return 0;
}
Example #15
0
void Compute(graph<vertex>& GA, commandLine P) {
  t10.start();
  char* oFile = P.getOptionValue("-out"); //file to write eccentricites
  srand (time(NULL));
  uintT seed = rand();
  cout << "seed = " << seed << endl;
  t0.start();
  long n = GA.n;
  uintE* ecc = newA(uintE,n);
  {parallel_for(long i=0;i<n;i++) ecc[i] = UINT_E_MAX;}
  t0.stop();

  //BEGIN COMPUTE CONNECTED COMPONENTS
  t1.start();
  intE* Labels = newA(intE,n);
  {parallel_for(long i=0;i<n;i++) {
    if(GA.V[i].getOutDegree() == 0) Labels[i] = -i-1; //singletons
    else Labels[i] = INT_E_MAX;
    }}
  //get max degree vertex
  uintE maxV = sequence::reduce<uintE>((intE)0,(intE)n,maxF<intE>(),getDegree<vertex>(GA.V));
  //visit large component with BFS
  CCBFS(maxV,GA,Labels);
  //visit small components with label propagation
  Components(GA, Labels);
  //sort by component ID
  intPair* CCpairs = newA(intPair,n);
  {parallel_for(long i=0;i<n;i++)
    if(Labels[i] < 0)
      CCpairs[i] = make_pair(-Labels[i]-1,i);
    else CCpairs[i] = make_pair(Labels[i],i);
  }
  free(Labels);

  intSort::iSort(CCpairs, n, n+1, firstF<uintE,uintE>());

  uintE* changes = newA(uintE,n);
  changes[0] = 0;
  {parallel_for(long i=1;i<n;i++) 
      changes[i] = (CCpairs[i].first != CCpairs[i-1].first) ? i : UINT_E_MAX;}

  uintE* CCoffsets = newA(uintE,n);
  uintE numCC = sequence::filter(changes, CCoffsets, n, nonMaxF());
  CCoffsets[numCC] = n;
  free(changes);
  t1.stop();
  //END COMPUTE CONNECTED COMPONENTS

  uintE maxS = min((uintE)n,(uintE)sqrt(n*log2(n)));
  uintE maxSampleSize = max((uintE)10,max((uintE)((n/maxS)*log2(n)),maxS));
  //data structures to be shared by all components
  uintE** Dists = newA(uintE*,maxSampleSize);
  uintE* Dist = newA(uintE,maxSampleSize*n);
  {parallel_for(long i=0;i<maxSampleSize;i++) Dists[i] = Dist+i*n;}
  {parallel_for(long i=0;i<n*maxSampleSize;i++) Dist[i] = UINT_E_MAX;}
  intPair* wDist = newA(intPair,n); 
  {parallel_for(long i=0;i<n;i++)
      wDist[i] = make_pair(UINT_E_MAX,UINT_E_MAX);}
  intPair* minDists = newA(intPair,n);
  uintE* starts = newA(uintE,n);
  uintE* starts2 = newA(uintE,n);
  uintE* maxDists = newA(uintE,n);

  //BEGIN COMPUTE ECCENTRICITES PER COMPONENT
  t4.start();
  for(long k = 0; k < numCC; k++) {
    uintE o = CCoffsets[k];
    uintE CCsize = CCoffsets[k+1] - o;
    if(CCsize == 1) ecc[CCpairs[o].second] = 0; //singletons have ecc of 0
    if(CCsize == 2) { //size 2 CC's have ecc of 1
      ecc[CCpairs[o].second] = ecc[CCpairs[o+1].second] = 1;
    } else if(CCsize > 1) {
      //do main computation
      t2.start();
      uintE s = min(CCsize,(uintE)sqrt(CCsize*log2(CCsize)));
      //pick sample of about \sqrt{n\log n} vertices
      long sampleSize = min(CCsize,max((uintE)10,(uintE)((CCsize/s)*log2(CCsize))));
      //pick random vertices
      {parallel_for(ulong i=0;i<CCsize;i++) {
	  //pick with probability sampleSize/CCsize
	  uintT index = hash(i+seed) % CCsize; 
	if(index < sampleSize) starts[i] = CCpairs[o+i].second;
	else starts[i] = UINT_E_MAX;
       	}}
      //pack down
      uintE numUnique = sequence::filter(starts,starts2,CCsize,nonMaxF());
      //sample cannot be empty!
      if(numUnique == 0) { starts2[0] = CCpairs[o+(hash(seed)%CCsize)].second; numUnique++; }
      if(numUnique > maxSampleSize) numUnique = maxSampleSize; //cap at maxSampleSize
      t2.stop();
      t3.start();
      //execute BFS per sample
      {for(long i=0;i<numUnique;i++) {
	uintE v = starts2[i];
	Dists[i][v] = 0; //set source dist to 0
	vertexSubset Frontier(n,v);
	uintE round = 0;
	while(!Frontier.isEmpty()){
	  round++;
	  vertexSubset output = 
	    edgeMap(GA, Frontier, BFS_F(Dists[i],round),GA.m/20);
	  Frontier.del();
	  Frontier = output;
	}
	Frontier.del();
	ecc[v] = round-1; //set radius for sample vertex
	}}
      t3.stop();
      t4.start();
      //store max distance from sample for each vertex so that we can
      //reuse Distance arrays
      {parallel_for(long i=0;i<CCsize;i++) {
	uintE v = CCpairs[o+i].second;
	//if not one of the vertices we did BFS on
	if(ecc[v] == UINT_E_MAX) {
	  uintE max_from_sample = 0;
	  //compute max distance from sampled vertex
	  for(long j=0;j<numUnique;j++) {
	    uintE d = Dists[j][v];
	    if(d > max_from_sample) max_from_sample = d;
	  }
	  maxDists[i] = max_from_sample;
	}}}
      t4.stop();
      t5.start();
      //find furthest vertex from sample set S
      {parallel_for(long j=0;j<CCsize;j++) {
	uintE v = CCpairs[o+j].second;
	uintE m = UINT_E_MAX;
	for(long i=0;i<numUnique;i++) {
	  uintE d = Dists[i][v];
	  if(d < m) m = d;
	  if(d == 0) break;
	}
	minDists[j] = make_pair(m,v);
	}}
      
      intPair furthest = 
	sequence::reduce<intPair>(minDists,(intE)CCsize,maxFirstF());
      uintE w = furthest.second;
      t5.stop();
      t3.start();
      //reset Dist array entries
      {parallel_for(long i=0;i<numUnique;i++) {
	  parallel_for(long j=0;j<CCsize;j++) {
	    uintE v = CCpairs[o+j].second;
	    Dists[i][v] = UINT_E_MAX;
	  }
	}}
      t3.stop();
      t6.start();
      //execute BFS from w and find \sqrt{n log n} neighborhood of w
      uintE nghSize = min(CCsize,max((uintE)10,s));
      uintE* Ngh_s = starts; //reuse starts array
      bool filled_Ngh = 0;
      //stores distance from w and index of closest vertex in Ngh_s on
      //path from w to v
      wDist[w] = make_pair(0,0); //set source dist to 0
      vertexSubset Frontier(n,w);
      uintE round = 0;
      uintE numVisited = 0;
      while(!Frontier.isEmpty()){
	round++;
	if(!filled_Ngh) { 
	  Frontier.toSparse();
	  //Note: if frontier size < nghSize - visited, there is non-determinism in which vertices 
	  //get added to Ngh_s as the ordering of vertices on the frontier is non-deterministic
	  {parallel_for(long i=0;i<min(nghSize-numVisited,(uintE)Frontier.numNonzeros());i++) {
	    Ngh_s[numVisited+i] = Frontier.s[i];
	    wDist[Frontier.s[i]].second = numVisited+i;
	  }
	  numVisited += Frontier.numNonzeros();
	  if(numVisited >= nghSize) filled_Ngh = 1;
	  }}
	vertexSubset output = 
	  edgeMap(GA, Frontier, BFS_Pair_F(wDist,round),GA.m/20);
	Frontier.del();
	Frontier = output;
      }
      Frontier.del();
      ecc[w] = round-1; //set radius for w
      t6.stop();
      t7.start();
      //execute BFS from each vertex in neighborhood of w
      uintE** Dists2 = Dists; //reuse distance array
      uintE* Dist2 = Dist;
    
      {for(long i=0;i<nghSize;i++) {
	uintE v = Ngh_s[i];
	Dists2[i][v] = 0; //set source dist to 0
	vertexSubset Frontier(n,v);
	uintE round = 0;
	while(!Frontier.isEmpty()){
	  round++;
	  vertexSubset output = 
	    edgeMap(GA, Frontier, BFS_F(Dists2[i],round),GA.m/20);
	  Frontier.del();
	  Frontier = output;
	}
	Frontier.del();
	ecc[v] = round-1; //set radius of vertex in Ngh_s
	}}
      t7.stop();
      t8.start();
      //min radius of sample
      parallel_for(long i=0;i<numUnique;i++) starts2[i] = ecc[starts2[i]];
      uintE min_r_sample = 
	sequence::reduce<uintE>(starts2,numUnique,minF<uintE>());
      //compute ecc values
      {parallel_for(long i=0;i<CCsize;i++) {
	uintE v = CCpairs[o+i].second;
	//if not one of the vertices we did BFS on
	if(ecc[v] == UINT_E_MAX) {
	  uintE d_vw = wDist[v].first;
	  uintE rv = max(maxDists[i],d_vw);
	  //index in Ngh_s of closest vertex in Ngh_s on path from w to v
	  uintE index_vt = wDist[v].second;
	  uintE vt = Ngh_s[index_vt];
	  uintE d_vt_v = Dists2[index_vt][v];
	  uintE d_vt_w = Dists2[index_vt][w];
	  if(d_vt_v <= d_vt_w) ecc[v] = max(rv,ecc[vt]);
	  else ecc[v] = max(rv,min_r_sample);
	}
	}}
      t8.stop();
      t7.start();
      //reset Dist array entries
      {parallel_for(long i=0;i<nghSize;i++) {
	  parallel_for(long j=0;j<CCsize;j++) {
	    uintE v = CCpairs[o+j].second;
	    Dists2[i][v] = UINT_E_MAX;
	  }
	}}
      t7.stop();
      t6.start();
      //reset wDist array entries
      {parallel_for(long i=0;i<CCsize;i++) {
	  uintE v = CCpairs[o+i].second;
	  wDist[v] = make_pair(UINT_E_MAX,UINT_E_MAX);
	}}
      t6.stop();
    }
Example #16
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  debug                    = get_option_int("debug", debug);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE && distance_metric != PROB)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0= JACCARD, 1= AA, 2= RA, 3= ASYM_COSINE, 4 = PROB" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training, 0, 0, 3, TRAINING, false);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

 logstream(LOG_INFO) << "M = " << M << std::endl;
  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA || distance_metric == PROB)
    latent_factors_inmem.resize(M);
  if (distance_metric == PROB)
    prob_sim_normalization_constant = (double)L / (double)(M*N-L);


  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, 1, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
 
  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);

  delete[] relevant_items;

  /* write the matrix market info header to be used later */
  FILE * pmm = fopen((training + "-topk:info").c_str(), "w");
  if (pmm == NULL)
    logstream(LOG_FATAL)<<"Failed to open " << training << ":info to file" << std::endl;
  fprintf(pmm, "%%%%MatrixMarket matrix coordinate real general\n");
  fprintf(pmm, "%u %u %u\n", N, N, (unsigned int)sum(written_pairs));
  fclose(pmm);

  /* sort output files */
  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup(training.c_str())))).c_str()); 

  return 0;
}
Example #17
0
int main(int argc, const char ** argv) {
  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("itemsim2rating2");    

  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  debug                    = get_option_int("debug", 0);
  parse_command_line_args();
  std::string similarity   = get_option_string("similarity", "");
  if (similarity == "")
    logstream(LOG_FATAL)<<"Missing similarity input file. Please specify one using the --similarity=filename command line flag" << std::endl;
  undirected               = get_option_int("undirected", 0);
  
  mytimer.start();

  int nshards          = convert_matrixmarket_and_item_similarity<edge_data>(training, similarity, 3, &degrees);

  assert(M > 0 && N > 0);
  prob_sim_normalization_constant = (double)L / (double)(M*N-L);
  
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();

  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, edge_data> engine(training, nshards, true, m); 
  set_engine_flags(engine);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s-rec.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }


  K 			   = get_option_int("K");
  assert(K > 0);
  //run the program
  engine.run(program, niters);

  for (uint i=0; i< out_files.size(); i++)
    fclose(out_files[i]);
  
  delete[] relevant_items;


  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);

  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << std::endl;

  logstream(LOG_INFO)<<"Going to sort and merge output files " << std::endl;
  std::string dname= dirname(strdup(argv[0]));
  system(("bash " + dname + "/topk.sh " + std::string(basename(strdup((training+"-rec").c_str())))).c_str()); 


  return 0;
}
Example #18
0
int main(int argc, const char ** argv) {

  print_copyright();

  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("item-cf");    
  /* Basic arguments for application */
  min_allowed_intersection = get_option_int("min_allowed_intersection", min_allowed_intersection);
  distance_metric          = get_option_int("distance", JACCARD);
  asym_cosine_alpha        = get_option_float("asym_cosine_alpha", 0.5);
  if (distance_metric != JACCARD && distance_metric != AA && distance_metric != RA && distance_metric != ASYM_COSINE)
    logstream(LOG_FATAL)<<"Wrong distance metric. --distance_metric=XX, where XX should be either 0) JACCARD, 1) AA, 2) RA, 3) ASYM_COSINE" << std::endl;  
  parse_command_line_args();

  mytimer.start();
  int nshards          = convert_matrixmarket<EdgeDataType>(training/*, orderByDegreePreprocessor*/);
  if (nshards != 1)
    logstream(LOG_FATAL)<<"This application currently supports only 1 shard" << std::endl;
  K                        = get_option_int("K", K);
  if (K <= 0)
    logstream(LOG_FATAL)<<"Please specify the number of ratings to generate for each user using the --K command" << std::endl;

  assert(M > 0 && N > 0);
  //initialize data structure which saves a subset of the items (pivots) in memory
  adjcontainer = new adjlist_container();
  //array for marking which items are conected to the pivot items via users.
  relevant_items = new bool[N];

  //store node degrees in an array to be used for AA distance metric
  if (distance_metric == AA || distance_metric == RA)
    latent_factors_inmem.resize(M);

  /* Run */
  ItemDistanceProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, true, m); 
  set_engine_flags(engine);
  engine.set_maxwindow(M+N+1);

  //open output files as the number of operating threads
  out_files.resize(number_of_omp_threads());
  for (uint i=0; i< out_files.size(); i++){
    char buf[256];
    sprintf(buf, "%s.out%d", training.c_str(), i);
    out_files[i] = open_file(buf, "w");
  }

  //run the program
  engine.run(program, niters);

  /* Report execution metrics */
  if (!quiet)
    metrics_report(m);
  
  std::cout<<"Total item pairs compared: " << item_pairs_compared << " total written to file: " << sum(written_pairs) << " pairs with zero distance: " << zero_dist << std::endl;
  if (not_enough)
    logstream(LOG_WARNING)<<"Items that did not have enough similar items: " << not_enough << std::endl;
  for (uint i=0; i< out_files.size(); i++){
    fflush(out_files[i]);
    fclose(out_files[i]);
  }

  std::cout<<"Created "  << number_of_omp_threads() << " output files with the format: " << training << ".outXX, where XX is the output thread number" << std::endl; 

  delete[] relevant_items;
  return 0;
}
Example #19
0
_seq<intT> setCover(Graph GS) {
    double epsilon = 0.01;
    intT m = maxElt(GS);
    cout << "m = " << m << endl;

    bucketTime.start();
    pair<bucket*, int> B = putInBuckets(GS, epsilon);
    bucketTime.stop();
    bucket* allBuckets = B.first;
    int numBuckets = B.second;

    set* S = newA(set, GS.n);    // holds sets for current bucket
    set* ST = newA(set, GS.n);   // temporarily S (pack is not inplace)
    int l = 0;                   // size of S
    bool* flag = newA(bool, GS.n);
    intT* inCover = newA(intT, GS.n);
    intT nInCover = 0;
    intT totalWork = 0;
    intT* elts = newA(intT,m);
    intT threshold = GS.n;
    for (int i = 0; i < m; i++) elts[i] = INT_MAX;

    // loop over all buckets, largest degree first
    for (int i = numBuckets-1; i >= 0; i--) {
        bucket currentB = allBuckets[i];

        intT degreeThreshold = ceil(pow(1.0+epsilon,i));
        if (degreeThreshold == threshold && currentB.n == 0) continue;
        else threshold = degreeThreshold;
        packTime.start();

        // pack leftover sets that are below threshold down for the next round
        for (int j = 0; j < l; j++)
            flag[j] = (S[j].degree > 0 && S[j].degree < threshold);
        intT ln = sequence::pack(S, ST, flag, l);

        // pack leftover sets greater than threshold above for this round
        for (int j = 0; j < l; j++)
            flag[j] = (S[j].degree >= threshold);
        intT lb = sequence::pack(S, ST+ln, flag, l);

        // copy prebucketed bucket i to end, also for this round
        for (int j = 0; j < currentB.n; j++)
            ST[j+ln+lb] = currentB.S[j];

        lb = lb + currentB.n;   // total number in this round
        l = ln + lb;            // total number including those for next round
        swap(ST,S);             // since pack is not in place
        set* SB = S + ln;       // pointer to bottom of sets for this round
        packTime.stop();

        if (lb > 0) { // is there anything to do in this round?

            manisTime.start();
            intT work = processBucket(SB, elts, lb, threshold);
            totalWork += work;
            manisTime.stop();
            packTime.start();

            // check which sets were selected by manis to be in the set cover
            for (int j = 0; j < lb; j++)
                flag[j] = SB[j].degree < 0;

            // add these to inCover and label by their original ID
            int nNew = sequence::packIndex(inCover+nInCover, flag, lb);
            for (int j = nInCover; j < nInCover + nNew; j++)
                inCover[j] = SB[inCover[j]].id;
            nInCover = nInCover + nNew;
            packTime.stop();
            cout << "i = " << i << " bc = " << currentB.n << " l = " << l << " lb = " << lb
                 << " work = " << work << " new = " << nNew << " threshold = " << threshold << endl;
        }
    }
    cout << "Set cover size = " << nInCover << endl;
    cout << "Total work = " << totalWork << endl;
    cout << "Bucket Time = " << bucketTime.total() << endl;
    cout << "Manis Time = " << manisTime.total() << endl;
    cout << "Pack Time = " << packTime.total() << endl;

    free(elts);
    free(S);
    free(ST);
    free(flag);
    freeBuckets(allBuckets);
    return _seq<intT>(inCover, nInCover);
}
Example #20
0
int main(int argc, const char ** argv) {

  mytimer.start();
  logstream(LOG_WARNING)<<"GraphChi Collaborative filtering library is written by Danny Bickson (c). Send any "
    " comments or bug reports to [email protected] " << std::endl;
  /* GraphChi initialization will read the command line 
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("nmf-inmemory-factors");

  /* Basic arguments for application. NOTE: File will be automatically 'sharded'. */
  training = get_option_string("training");    // Base filename
  validation = get_option_string("validation", "");
  test = get_option_string("test", "");
  knn_sample_percent = get_option_float("knn_sample_percent", 1.0);
  if (knn_sample_percent <= 0 || knn_sample_percent > 1)
    logstream(LOG_FATAL)<<"Sample percente should be in the range (0, 1] " << std::endl;

  if (validation == "")
    validation += training + "e";  
  if (test == "")
    test += training + "t";

  maxval        = get_option_float("maxval", 1e100);
  minval        = get_option_float("minval", -1e100);
  bool quiet    = get_option_int("quiet", 0);
  num_ratings   = get_option_int("num_ratings", 10);
  if (num_ratings <= 0)
    logstream(LOG_FATAL)<<"num_ratings, the number of recomended items for each user, should be >=1 " << std::endl;

  debug         = get_option_int("debug", 0);
  if (quiet)
    global_logger().set_log_level(LOG_ERROR);

  bool scheduler       = false;                        // Selective scheduling not supported for now.

  /* Preprocess data if needed, or discover preprocess files */
  int nshards = convert_matrixmarket<float>(training);
  assert(M > 0 && N > 0);
  latent_factors_inmem.resize(M+N); // Initialize in-memory vertices.
  max_left_vertex = M-1;
  max_right_vertex = M+N-1;
  read_factors<vertex_data>(training + "_U.mm", true);
  read_factors<vertex_data>(training + "_V.mm", false);
  if ((uint)num_ratings > N){
    logstream(LOG_WARNING)<<"num_ratings is too big - setting it to: " << N << std::endl;
    num_ratings = N;
  }
  srand(time(NULL));

  /* Run */
  RatingVerticesInMemProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, scheduler, m); 
  engine.set_modifies_inedges(false);
  engine.set_modifies_outedges(false);
  engine.set_disable_vertexdata_storage();
  pengine = &engine;
  engine.run(program, 1);

  m.set("latent_dimension", (int)D);

  /* Output latent factor matrices in matrix-market format */
  vid_t numvertices = engine.num_vertices();
  assert(numvertices == max_right_vertex + 1); // Sanity check
  output_knn_result(training, numvertices, max_left_vertex);

  rating_stats();
  /* Report execution metrics */
  metrics_report(m);
  return 0;
}
int main(int argc,  const char *argv[]) {
	logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);
	mytimer.start();

	outdir = get_option_string("output","");
	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list","");
	filename = get_option_string("training","");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	tsv = get_option_int("tsv", 0); //is this tab seperated file?
	csv = get_option_int("csv", 0); // is the comma seperated file?
	binary = get_option_int("binary", 0);
	single_domain = get_option_int("single_domain", 0);
	has_header_titles = get_option_int("has_header_titles", has_header_titles);
	ignore_rest_of_line = get_option_int("ignore_rest_of_line", ignore_rest_of_line);
	mytime.start();


	string_to_tokenize = spaces;
	if (tsv)
		string_to_tokenize = tsv_spaces;
	else if (csv)
		string_to_tokenize = csv_spaces;

	if (dir != ""){
		FILE * f = fopen(dir.c_str(), "r");
		if (f == NULL)
			logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

		while(true){
			char buf[256];
			int rc = fscanf(f, "%s\n", buf);
			if (rc < 1)
				break;
			in_files.push_back(buf);
		}
	}
	else if (filename != "")
		in_files.push_back(filename);

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (uint i=0; i< in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;
	M = string2nodeid.size();
	if (single_domain)
		N = M;
	else N = string2nodeid2.size();

	save_map_to_text_file(string2nodeid, outdir + ".user.map");
	if (!single_domain){
		save_map_to_text_file(string2nodeid2, outdir + ".item.map");
	}
	std::string filename = "matrix_market.info";
	if (in_files.size() == 1)
		filename = in_files[0] + ".out:info";
	logstream(LOG_INFO)<<"Writing matrix market header into file: " << filename << std::endl;
	out_file fout(filename.c_str());
	MM_typecode out_typecode;
	mm_clear_typecode(&out_typecode);
	mm_set_integer(&out_typecode); 
	mm_set_sparse(&out_typecode); 
	mm_set_matrix(&out_typecode);
	mm_write_banner(fout.outf, out_typecode);
	mm_write_mtx_crd_size(fout.outf, M, N, nnz);
	return 0;
}
Example #22
0
void Compute(graph<vertex>& GA, commandLine P) {
  t1.start();
  long start = P.getOptionLongValue("-r",0);
  if(GA.V[start].getOutDegree() == 0) { 
    cout << "starting vertex has degree 0" << endl;
    return;
  }
  const int procs = P.getOptionIntValue("-p",0);
  if(procs > 0) setWorkers(procs);
  const double t = P.getOptionDoubleValue("-t",3);
  const double epsilon = P.getOptionDoubleValue("-e",0.000000001);
  const uintE N = P.getOptionIntValue("-N",1);
  const intE n = GA.n;
  const double constant = exp(t)*epsilon/(2*(double)N);
  double* psis = newA(double,N);
  double* fact = newA(double,N);
  fact[0] = 1;
  for(long k=1;k<N;k++) fact[k] = k*fact[k-1];
  double* tm = newA(double,N);
  {parallel_for(long m=0;m<N;m++) tm[m]  = pow(t,m);}
  {parallel_for(long k=0;k<N;k++) {
    psis[k] = 0;
    for(long m=0;m<N-k;m++)
      psis[k] += fact[k]*tm[m]/(double)fact[m+k];
    }}

  sparseAdditiveSet<float> x = sparseAdditiveSet<float>(10000,1,0.0);
  sparseAdditiveSet<float> r = sparseAdditiveSet<float>(2,1,0.0);
  x.insert(make_pair(start,0.0));
  r.insert(make_pair(start,1.0));
  vertexSubset Frontier(n,start);

  long j = 0, totalPushes = 0;
  while(Frontier.numNonzeros() > 0){
    totalPushes += Frontier.numNonzeros();
    uintT* Degrees = newA(uintT,Frontier.numNonzeros());
    {parallel_for(long i=0;i<Frontier.numNonzeros();i++) Degrees[i] = GA.V[Frontier.s[i]].getOutDegree();}
    long totalDegree = sequence::plusReduce(Degrees,Frontier.numNonzeros());
    free(Degrees);
    if(j+1 < N) {
      long rCount = r.count();
      //make bigger hash table initialized to 0.0's
      sparseAdditiveSet<float> new_r = sparseAdditiveSet<float>(max(100L,min((long)n,totalDegree+rCount)),LOAD_FACTOR,0.0); 
      vertexMap(Frontier,Local_Update(x,r));
      vertexSubset output = edgeMap(GA, Frontier, HK_F<vertex>(x,r,new_r,GA.V,t/(double)(j+1)));
      r.del(); 
      r = new_r;
      if(x.m < ((uintT) 1 << log2RoundUp((uintT)(LOAD_FACTOR*min((long)n,rCount+output.numNonzeros()))))) {
	sparseAdditiveSet<float> new_x = sparseAdditiveSet<float>(LOAD_FACTOR*min((long)n,rCount+output.numNonzeros()),LOAD_FACTOR,0.0); //make bigger hash table
	new_x.copy(x);
	x.del();
	x = new_x;
      }
      output.del();

      //compute active set (faster in practice to just scan over r)
      _seq<ACLpair> vals = r.entries(activeF<vertex>(GA.V,constant/psis[j+1]));
      uintE* Active = newA(uintE,vals.n);
      parallel_for(long i=0;i<vals.n;i++) Active[i] = vals.A[i].first;
      Frontier.del(); vals.del();
      Frontier = vertexSubset(n,vals.n,Active);
      j++;
    } else { //last iteration
Example #23
0
int main(int argc, const char ** argv) {
  /* GraphChi initialization will read the command line
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("connected-components-inmem");

  /* Basic arguments for application */
  std::string filename = get_option_string("file");  // Base filename
  int niters           = get_option_int("niters", 100); // Number of iterations (max)
  int output_labels    = get_option_int("output_labels", 0); //output node labels to file?
  bool scheduler       = true;    // Always run with scheduler

  /* Process input file - if not already preprocessed */
  float p                 = get_option_float("p", -1);
  int n                 = get_option_int("n", -1);
  int quiet = get_option_int("quiet", 0);
  if (quiet)
    global_logger().set_log_level(LOG_ERROR);
  int nshards             = (int) convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto"));
  mytimer.start();

  /* Run */
  ConnectedComponentsProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(filename, nshards, scheduler, m);
  engine.set_disable_vertexdata_storage();  
  engine.set_enable_deterministic_parallelism(false);
  engine.set_modifies_inedges(false);
  engine.set_modifies_outedges(false);
  engine.set_preload_commit(false);
  engine.set_maxwindow(engine.num_vertices());

  mytimer.start();

  active_nodes = new bool[engine.num_vertices()];
  for (int i=0; i< engine.num_vertices(); i++)
    active_nodes[i] = true;
  engine.run(program, niters);


  /* Run analysis of the connected components  (output is written to a file) */
  if (output_labels){
    FILE * pfile = fopen((filename + "-components").c_str(), "w");
    if (!pfile)
      logstream(LOG_FATAL)<<"Failed to open file: " << filename << std::endl;
    fprintf(pfile, "%%%%MatrixMarket matrix array real general\n");
    fprintf(pfile, "%lu %u\n", engine.num_vertices()-1, 1);
    for (uint i=1; i< engine.num_vertices(); i++){
      fprintf(pfile, "%u\n", vertex_values[i]);
      assert(vertex_values[i] >= 0 && vertex_values[i] < engine.num_vertices());
    }
    fclose(pfile); 
    logstream(LOG_INFO)<<"Saved succesfully to out file: " << filename << "-components" << " time for saving: " << mytimer.current_time() << std::endl;
  } 

  std::cout<<"Total runtime: " << mytimer.current_time() << std::endl;
  if (p > 0)
    std::cout << "site fraction p= " << p << std::endl;
  if (n > 0){
    std::cout << "n=" << n*p << std::endl;
    std::cout << "isolated sites: " << p*(double)n-actual_vertices << std::endl;
  }
  std::cout << "Number of sites: " << actual_vertices << std::endl;
  std::cout << "Number of bonds: " << engine.num_edges() << std::endl;
  if (n){
    std::cout << "Percentage of sites: " << (double)actual_vertices / (double)n << std::endl;
    std::cout << "Percentage of bonds: " << (double)engine.num_edges() / (2.0*n) << std::endl;
  }
  std::cout  << "Number of iterations: " << iter << std::endl;
  std::cout << "SITES RESULT:\nsize\tcount\n";
  std::map<uint,uint> final_countsv;
  std::map<uint,uint> final_countse;
  std::map<uint,uint> statv;
  for (int i=0; i< engine.num_vertices(); i++)
    statv[vertex_values[i]]++;


  uint total_sites = 0;
  for (std::map<uint, uint>::const_iterator iter = statv.begin();
      iter != statv.end(); iter++) {
    //std::cout << iter->first << "\t" << iter->second << "\n";
    final_countsv[iter->second] += 1;
    total_sites += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countsv.begin();
      iter != final_countsv.end(); iter++) {
    std::cout << iter->first << "\t" << iter->second << "\n";
  }
  edge_count = 1;
  engine.run(program, 1);
  std::cout << "BONDS RESULT:\nsize\tcount\n";
  uint total_bonds = 0;
  for (std::map<uint, uint>::const_iterator iter = state.begin();
      iter != state.end(); iter++) {
    //std::cout << iter->first << "\t" << iter->second << "\n";
    final_countse[iter->second] += 1;
    total_bonds += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countse.begin();
      iter != final_countse.end(); iter++) {
    std::cout << iter->first << "\t" << iter->second << "\n";
  }
  assert(total_sites == graph.num_vertices());
  assert(total_bonds == graph.num_edges());

  return 0;
}
Example #24
0
/* The main drawing function. */
void DrawGLScene(void)
{
    const_move(delta.get_ticks());
    delta.start();

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);		// Clear The Screen And The Depth Buffer
    glLoadIdentity();				// Reset The View
    glTranslatef(-(gridsize/2) +1, 0, -30);



    //glRotatef(zrot, 0.0f, 0.0f, 1.0f);

    // choose the texture to use
    glBindTexture(GL_TEXTURE_2D, texture[0]);
    //glLoadIdentity();
    for(float _xtrans_l =0; _xtrans_l < gridsize; _xtrans_l+=2)
    {
        for(float _ztrans_l =0; _ztrans_l < gridsize; _ztrans_l+=2)
        {
            //glPushMatrix();
            glDisable(GL_TEXTURE_2D);
            glEnable(GL_BLEND);
            glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
            glColor3ub(144, 213, 225);
            glLineWidth(1);
            glPushMatrix();
            // rotate triangle on y axis
            /**glRotatef(xrot, 0.0f, 1.0f, 0.0f);*/
            //glRotatef(0, 0.0f, 0.0f, 0.0f);
            glTranslatef(_xtrans_l, _ytrans, _ztrans_l);
            if(_xtrans_l != gridsize)
            {
                glBegin(GL_LINES);
                    glVertex3f(-1.0f, 0.0f, -1.0f);
                    glVertex3f(1.0f, 0.0f, -1.0f);
                glEnd();
                glBegin(GL_LINES);
                    glVertex3f(-1.0f, 0.0f, 1.0f);
                    glVertex3f(1.0f, 0.0f, 1.0f);
                glEnd();
            }

//            {
//                glBegin(GL_LINES);
//                    glVertex3f(0.0f, 0.0f, 0.0f);
//                    glVertex3f(2.0f, 0.0f, 0.0f);
//                glEnd();
//            }
            if(_ztrans_l != gridsize)
            {
                glBegin(GL_LINES);
                    glVertex3f(-1.0f, 0.0f, -1.0f);
                    glVertex3f(-1.0f, 0.0f, 1.0f);
                glEnd();
                glBegin(GL_LINES);
                    glVertex3f(1.0f, 0.0f, -1.0f);
                    glVertex3f(1.0f, 0.0f, 1.0f);
                glEnd();
            }

//            {
//                glBegin(GL_LINES);
//                    glVertex3f(0.0f, 0.0f, 0.0f);
//                    glVertex3f(0.0f, 0.0f, 2.0f);
//                glEnd();
//            }
            glPopMatrix();


        }
    }

    glPushMatrix();
    glTranslatef(_xtrans, _ytrans, _ztrans);

    if(dir_array[0])
    {
        //up
        glRotatef(90, 0, 1, 0);
    }
    else if(dir_array[1])
    {
        //right
        glRotatef(0, 0, 1, 0);
    }
    else if(dir_array[2])
    {
        //down
        glRotatef(-90, 0, 1, 0);
    }
    else if(dir_array[3])
    {
        //left
        glRotatef(180, 0, 1, 0);
    }
    //draw cube for player palceholder
    glBegin(GL_QUADS);
        //bottom colour
        glColor3ub(144, 213, 225);
        //bottom
        glVertex3f(-1.0f, 0.0f, -1.0f);
        glVertex3f(1.0f, 0.0f, -1.0f);
        glVertex3f(1.0f, 0.0f, 1.0f);
        glVertex3f(-1.0f, 0.0f, 1.0f);
        //left colour
        glColor3ub(144, 213, 225);
        //left
        glVertex3f(-1.0f, 0.0f, -1.0f);
        glVertex3f(-1.0f, 0.0f,  1.0f);
        glVertex3f(-1.0f, 2.0f, 1.0f);
        glVertex3f(-1.0f, 2.0f, -1.0f);
        //top colour
        glColor3ub(144, 213, 225);
        //top
        glVertex3f(-1.0f, 2.0f, -1.0f);
        glVertex3f(-1.0f, 2.0f, 1.0f);
        glVertex3f(1.0f, 2.0f, 1.0f);
        glVertex3f(1.0f, 2.0f, -1.0f);
        //right colour
        glColor3ub(144, 213, 225);
        //right bottom
        glVertex3f(1.0f, 0.0f, -1.0f);
        glVertex3f(2.0f, 1.0f, -1.0f);
        glVertex3f(2.0f, 1.0f, 1.0f);
        glVertex3f(1.0f, 0.0f, 1.0f);
        //right colour
        glColor3ub(144, 213, 225);
        // right top
        glVertex3f(1.0f, 2.0f, -1.0f);
        glVertex3f(2.0f, 1.0f, -1.0f);
        glVertex3f(2.0f, 1.0f, 1.0f);
        glVertex3f(1.0f, 2.0f, 1.0f);
        //front colour
        glColor3ub(144, 213, 225);
        //front
        glVertex3f(-1.0f, 0.0f, 1.0f);
        glVertex3f(1.0f, 0.0f, 1.0f);
        glVertex3f(1.0f, 2.0f, 1.0f);
        glVertex3f(-1.0f, 2.0f, 0.0f);
        //back colour
        glColor3ub(144, 213, 225);
        //back
        glVertex3f(-1.0f, 0.0f, -1.0f);
        glVertex3f(-1.0f, 2.0f, -1.0f);
        glVertex3f(1.0f, 2.0f, -1.0f);
        glVertex3f(1.0f, 0.0f, -1.0f);
    glEnd();

    glPopMatrix();
    //push mid points of top and bottom of cube into vector
    // top
//    int vertexarray[12] = {};
//    trail_vector.push_back();

    glDisable(GL_BLEND);
    glEnable(GL_TEXTURE_2D);

    //count = 0;
    float _xtrans_int = _xtrans;

    if( _xtrans_int > 2)
    {
        count ++;
        _xtrans_int = 0;
    }
    std::cout << _xtrans_int << std::endl;



    for(int i = 0; i < count; i++)
    {
        glPushMatrix();
            drawtrail(count);
        glPopMatrix();
    }


    // since this is double buffered, swap the buffers to display what just got drawn.
    glutSwapBuffers();

}