/**
  * Called after an iteration has finished.
  */
 void after_iteration(int iteration, graphchi_context &ginfo) {
   logstream(LOG_DEBUG)<<mytimer.current_time() << "iteration: " << iteration << " changes: " << changes << std::endl;
   if (changes == 0)
     ginfo.set_last_iteration(iteration);
   changes = 0;
   iter++;
 }
Example #2
0
void training_rmse(int iteration, graphchi_context &gcontext){
    last_training_rmse = dtraining_rmse;
    dtraining_rmse = 0;
#pragma omp parallel for reduction(+:dtraining_rmse)
    for (int i=0; i< (int)M; i++){
      dtraining_rmse += latent_factors_inmem[i].rmse;
    }
    dtraining_rmse = sqrt(dtraining_rmse / pengine->num_edges());
    std::cout<< std::setw(10) << mytimer.current_time() << ") Iteration: " << std::setw(3) <<iteration<<" Training RMSE: " << std::setw(10)<< dtraining_rmse;
 }
Example #3
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

  if (vertex.id() >= M)
    return;

  vertex_data & vdata = latent_factors_inmem[vertex.id()];
  int howmany = N*knn_sample_percent;
  assert(howmany > 0 );
  vec distances = vec::Zero(howmany);
  ivec indices = ivec(howmany);
  for (int i=0; i< howmany; i++){
    indices[i]= -2;
  }
  std::vector<bool> curratings;
  curratings.resize(N);
  for(int e=0; e < vertex.num_edges(); e++) {
  //no need to calculate this rating since it is given in the training data reference
    curratings[vertex.edge(e)->vertex_id() - M] = true;
  }
   if (knn_sample_percent == 1.0){
     for (uint i=M; i< M+N; i++){
        if (curratings[i-M])
          continue;
        vertex_data & other = latent_factors_inmem[i];
        double dist;
        als_predict(vdata, other, 0, dist); 
        indices[i-M] = i-M;
        distances[i-M] = dist;
     }
  }
  else for (int i=0; i<howmany; i++){
        int random_other = ::randi(M, M+N-1);
        vertex_data & other = latent_factors_inmem[random_other];
        double dist;
        als_predict(vdata, other, 0, dist); 
        indices[i-M] = i-M;
        distances[i-M] = dist;
   }
  
  vec out_dist(num_ratings);
  ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings);
  assert(indices_sorted.size() <= num_ratings);
  assert(out_dist.size() <= num_ratings);
  vdata.ids = indices_sorted;
  vdata.ratings = out_dist;
  if (debug)
    printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]);

  if (vertex.id() % 1000 == 0)
    printf("Computing recommendaitons for user %d at time: %g\n", vertex.id()+1, mytimer.current_time());
  
  
  }
Example #4
0
int main(int argc,  const char *argv[]) {

  logstream(LOG_WARNING)<<"CE_Graph parsers library is written by Danny Bickson (c). Send any "
    " comments or bug reports to [email protected] " << std::endl;
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  CE_Graph_init(argc, argv);

  debug = get_option_int("debug", 0);
  dir = get_option_string("file_list");
  lines = get_option_int("lines", 0);
  omp_set_num_threads(get_option_int("ncpus", 1));
  mytime.start();

  FILE * f = fopen(dir.c_str(), "r");
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

  while(true){
    char buf[256];
    int rc = fscanf(f, "%s\n", buf);
    if (rc < 1)
      break;
    in_files.push_back(buf);
  }

  if (in_files.size() == 0)
    logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

#pragma omp parallel for
  for (uint i=0; i< in_files.size(); i++)
    parse(i);

  std::cout << "Finished in " << mytime.current_time() << std::endl << "\t direct tweets found: " << links_found  <<
    " \t global tweets: " << wide_tweets << 
    "\t http links: " << http_links << 
    "\t retweets: " << retweet_found <<
    "\t total lines in input file : " << total_lines << 
    " \t invalid records (missing names) " << missing_names <<  std::endl;

  save_map_to_text_file(string2nodeid, outdir + "map.text");
  save_map_to_text_file(nodeid2hash, outdir + "reverse.map.text");
  save_map_to_text_file(tweets_per_user, outdir + "tweets_per_user.text");

  out_file fout("mm.info");
  fprintf(fout.outf, "%%%%MatrixMarket matrix coordinate real general\n");
  fprintf(fout.outf, "%u %u %lu\n", maxfrom+1, maxto+1, links_found);
  return 0;
}
Example #5
0
int main(int argc,  const char *argv[]) {

	logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);

	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	from_val = get_option_int("from_val", from_val);
	to_val = get_option_int("to_val", to_val);
	mid_val = get_option_int("mid_val", mid_val);
	if (from_val == -1)
		logstream(LOG_FATAL)<<"Must set from/to " << std::endl;
	mytime.start();

	FILE * f = fopen(dir.c_str(), "r");
	if (f == NULL)
		logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

	while(true){
		char buf[256];
		int rc = fscanf(f, "%s\n", buf);
		if (rc < 1)
			break;
		in_files.push_back(buf);
	}

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (int i=0; i< (int)in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;

	save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text");
	return 0;
}
Example #6
0
  /**
   *  Vertex update function.
   */
  void update(CE_Graph_vertex<VertexDataType, EdgeDataType> &v, CE_Graph_context &gcontext) {
    if (debug)
      printf("Entered iteration %d with %d - edges %d\n", gcontext.iteration, v.id(), v.num_edges());

    /* even iteration numbers:
     * 1) load a subset of items into memory (pivots)
     * 2) Find which subset of items needs to compared to the users
     */
    if (gcontext.iteration % 2 == 0) {
      if (adjcontainer->is_pivot(v.id())){
        adjcontainer->load_edges_into_memory(v);         
        if (debug)
          printf("Loading pivot %d intro memory\n", v.id());
      }
    }
    else {

      for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){
        //since metric is symmetric, compare only to pivots which are smaller than this item id
        if (i >= v.id())
          continue;
        
        dense_adj &pivot_edges = adjcontainer->adjs[i - adjcontainer->pivot_st];
        //pivot is not connected to this item, continue
        if (get_val(pivot_edges.edges, v.id()) == 0)
            continue;

        double dist = adjcontainer->calc_distance(v, i, distance_metric);
        item_pairs_compared++;
        if (item_pairs_compared % 1000000 == 0)
          logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
        if (debug)
          printf("comparing %d to pivot %d distance is %lg\n", i+ 1, v.id() + 1, dist);
        if (dist != 0){
          fprintf(out_files[omp_get_thread_num()], "%u %u %.12lg\n", v.id()+1, i+1, (double)dist);//write item similarity to file
          //where the output format is: 
          //[item A] [ item B ] [ distance ] 
          written_pairs++;
        }
      }
    }//end of iteration % 2 == 1
  }//end of update function
Example #7
0
int main(int argc,  const char *argv[]) {

  Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
    " comments or bug reports to [email protected] " << std::endl;
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  graphchi_init(argc, argv);

  debug = get_option_int("debug", 0);
  dir = get_option_string("file_list");
  lines = get_option_int("lines", 0);
  omp_set_num_threads(get_option_int("ncpus", 1));
  mytime.start();

  FILE * f = fopen(dir.c_str(), "r");
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

  while(true){
    char buf[256];
    int rc = fscanf(f, "%s\n", buf);
    if (rc < 1)
      break;
    in_files.push_back(buf);
  }

  if (in_files.size() == 0)
    logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

//#pragma omp parallel for
  for (uint i=0; i< in_files.size(); i++)
    parse(i);

  std::cout << "Finished in " << mytime.current_time() << std::endl << 
    "\t total lines in input file : " << total_lines <<  "\t max from: " << maxfrom << "\t max to: " <<maxto << std::endl;

  return 0;
}
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) {
        if (debug)
            printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id());

        /* Even iteration numbers:
         * 1) load a subset of users into memory (pivots)
         * 2) Find which subset of items is connected to the users
         */
        if (gcontext.iteration % 2 == 0) {
            if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) {
                adjcontainer->load_edges_into_memory(v);
                if (debug)
                    printf("Loading pivot %d intro memory\n", v.id());
            }
        }
        /* odd iteration number:
        * 1) For any item connected to a pivot item
        *       compute itersection
        */
        else {
            assert(is_item(v.id()));

            for (int i=0; i< v.num_edges(); i++) {
                if (!adjcontainer->is_pivot(v.edge(i)->vertex_id()))
                    continue;
                if (debug)
                    printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1);

                adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
                item_pairs_compared++;

                if (item_pairs_compared % 1000000 == 0)
                    Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
            }
        }//end of iteration % 2 == 1
    }//end of update function
void parse(int i){    
	in_file fin(in_files[i]);
	out_file fout((outdir + ".out"));

	size_t linesize = 0;
	char * saveptr = NULL, * linebuf = NULL;
	size_t line = 1;
	uint from,to;
	bool matrix_market = false;

	while(true){
		int rc = getline(&linebuf, &linesize, fin.outf);
		if (rc < 1)
			break;
		if (strlen(linebuf) <= 1){ //skip empty lines
			line++;
			continue;
		}

		if (has_header_titles && line == 1){
			line++;
			continue;
		} 
		//skipping over matrix market header (if any) 
		if (!strncmp(linebuf, "%%MatrixMarket", 14)){
			matrix_market = true;
			continue;
		}
		if (matrix_market && linebuf[0] == '%'){
			continue;
		}
		if (matrix_market && linebuf[0] != '%'){
			matrix_market = false;
			continue;
		}

		//read [FROM]
		char *pch = strtok_r(linebuf,string_to_tokenize, &saveptr);
		if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf << "]" << std::endl; return; }
		assign_id(string2nodeid, from, pch, true);

		//read [TO]
		pch = strtok_r(NULL,string_to_tokenize, &saveptr);
		if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf << "]" << std::endl; return; }
		assign_id(single_domain ? string2nodeid:string2nodeid2, to, pch, single_domain ? true : false);

		//read the rest of the line
		if (!binary){
			if (ignore_rest_of_line)
				pch = strtok_r(NULL, string_to_tokenize, &saveptr);
			else
				pch = strtok_r(NULL, "\n", &saveptr);
			if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf << "]" << std::endl; return; }
		}
		if (tsv)
			fprintf(fout.outf, "%u\t%u\t%s\n", from, to, binary? "": pch);
		else if (csv)
			fprintf(fout.outf, "%u %u %s\n", from, to, binary? "" : pch);
		else 
			fprintf(fout.outf, "%u %u %s\n", from, to, binary? "" : pch);
		nnz++;

		line++;
		total_lines++;
		if (lines && line>=lines)
			break;

		if (debug && (line % 50000 == 0))
			logstream(LOG_INFO) << mytimer.current_time() << ") Parsed line: " << line << " map size is: " << string2nodeid.size() << std::endl;
		if (string2nodeid.size() % 500000 == 0)
			logstream(LOG_INFO) << mytimer.current_time() << ") Hash map size: " << string2nodeid.size() << " at time: " << mytime.current_time() << " edges: " << total_lines << std::endl;
	} 

	logstream(LOG_INFO) <<"Finished parsing total of " << line << " lines in file " << in_files[i] << endl <<
		"total map size: " << string2nodeid.size() << endl;

}
 /// \cond GRAPHLAB_INTERNAL
 inline double get_current_time() const {
   return ti.current_time();
 }
int main(int argc,  const char *argv[]) {
	logstream(LOG_WARNING)<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);
	mytimer.start();

	outdir = get_option_string("output","");
	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list","");
	filename = get_option_string("training","");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	tsv = get_option_int("tsv", 0); //is this tab seperated file?
	csv = get_option_int("csv", 0); // is the comma seperated file?
	binary = get_option_int("binary", 0);
	single_domain = get_option_int("single_domain", 0);
	has_header_titles = get_option_int("has_header_titles", has_header_titles);
	ignore_rest_of_line = get_option_int("ignore_rest_of_line", ignore_rest_of_line);
	mytime.start();


	string_to_tokenize = spaces;
	if (tsv)
		string_to_tokenize = tsv_spaces;
	else if (csv)
		string_to_tokenize = csv_spaces;

	if (dir != ""){
		FILE * f = fopen(dir.c_str(), "r");
		if (f == NULL)
			logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

		while(true){
			char buf[256];
			int rc = fscanf(f, "%s\n", buf);
			if (rc < 1)
				break;
			in_files.push_back(buf);
		}
	}
	else if (filename != "")
		in_files.push_back(filename);

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file names from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (uint i=0; i< in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;
	M = string2nodeid.size();
	if (single_domain)
		N = M;
	else N = string2nodeid2.size();

	save_map_to_text_file(string2nodeid, outdir + ".user.map");
	if (!single_domain){
		save_map_to_text_file(string2nodeid2, outdir + ".item.map");
	}
	std::string filename = "matrix_market.info";
	if (in_files.size() == 1)
		filename = in_files[0] + ".out:info";
	logstream(LOG_INFO)<<"Writing matrix market header into file: " << filename << std::endl;
	out_file fout(filename.c_str());
	MM_typecode out_typecode;
	mm_clear_typecode(&out_typecode);
	mm_set_integer(&out_typecode); 
	mm_set_sparse(&out_typecode); 
	mm_set_matrix(&out_typecode);
	mm_write_banner(fout.outf, out_typecode);
	mm_write_mtx_crd_size(fout.outf, M, N, nnz);
	return 0;
}
Example #12
0
void parse(int i){    
  in_file fin(in_files[i]);
  out_file fout((outdir + in_files[i] + ".out"));

  size_t linesize = 0;
  char * saveptr = NULL, * linebuf = NULL, buf1[256], linebuf_debug[1024];
  size_t line = 1;
  uint id;
  long int ptime;
  bool ok;
  bool first = true;

  while(true){
    int rc = getline(&linebuf, &linesize, fin.outf);
    strncpy(linebuf_debug, linebuf, 1024);
    total_lines++;
    if (rc < 1)
      break;
    if (strlen(linebuf) <= 1) //skip empty lines
      continue; 
    if (first){ first = false; continue; } //skip first line

    char *pch = strtok_r(linebuf," \r\n\t:/-", &saveptr);
    if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf << "]" << std::endl; return; }

    switch(*pch){
      case 'T':
        ok = convert_string_to_time(linebuf_debug, total_lines, i, saveptr, ptime);
        if (!ok)
          return;
        break;

      case 'U':
        ok = extract_user_name(linebuf_debug, total_lines, i, saveptr, buf1);
        if (ok)
          assign_id(id, buf1, line, in_files[i]);
        tweets_per_user[id]++;
        break;

      case 'W':
        ok = parse_links(linebuf_debug, total_lines, i, saveptr, id, ptime, fout.outf);
        if (debug && line < 20)
          printf("Found user: %s id %u time %ld\n", buf1, id, ptime);
        if (!ok)
          wide_tweets++;
        break;

      default:
        logstream(LOG_ERROR)<<"Error: expecting with T U or W first character" << std::endl;
        return;

    }

    line++;
    if (lines && line>=lines)
      break;

    if (debug && (line % 50000 == 0))
      logstream(LOG_INFO) << "Parsed line: " << line << " map size is: " << string2nodeid.size() << std::endl;
    if (string2nodeid.size() % 500000 == 0)
      logstream(LOG_INFO) << "Hash map size: " << string2nodeid.size() << " at time: " << mytime.current_time() << " edges: " << total_lines << std::endl;
  } 

  logstream(LOG_INFO) <<"Finished parsing total of " << line << " lines in file " << in_files[i] << endl <<
    "total map size: " << string2nodeid.size() << endl;

}
Example #13
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    //compute only for user nodes
    if (vertex.id() >= std::min(M,(uint)end_user) || vertex.id() < (uint)start_user)
      return;

    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    int howmany = (int)(N*knn_sample_percent);
    assert(howmany > 0 );
    if (vertex.num_outedges() == 0){
       mymutex.lock();
       users_without_ratings++;
       mymutex.unlock();
    }

    vec distances = zeros(howmany);
    ivec indices = ivec::Zero(howmany);
    for (int i=0; i< howmany; i++){
      indices[i]= -1;
    }
    std::vector<bool> curratings;
    curratings.resize(N);
    for(int e=0; e < vertex.num_edges(); e++) {
      //no need to calculate this rating since it is given in the training data reference
      assert(vertex.edge(e)->vertex_id() - M >= 0 && vertex.edge(e)->vertex_id() - M < N);
      curratings[vertex.edge(e)->vertex_id() - M] = true;
    }
    if (knn_sample_percent == 1.0){
      for (uint i=M; i< M+N; i++){
        if (curratings[i-M])
          continue;
        vertex_data & other = latent_factors_inmem[i];
        double dist;
        if (algo == SVDPP)
          svdpp_predict(vdata, other, 0, dist); 
        else if (algo == BIASSGD) 
	  biassgd_predict(vdata, other, 0, dist);
        else if (algo == RBM)
          rbm_predict(vdata, other, 0, dist);
        else assert(false);
        indices[i-M] = i-M;
        distances[i-M] = dist + 1e-10;
      }
    }
    else for (int i=0; i<howmany; i++){
      int random_other = ::randi(M, M+N-1);
      vertex_data & other = latent_factors_inmem[random_other];
      double dist;
      if (algo == SVDPP)
        svdpp_predict(vdata, other, 0, dist); 
      else if (algo == BIASSGD)
        biassgd_predict(vdata, other, 0, dist);
      else if (algo == RBM)
        rbm_predict(vdata, other, 0, dist);
      else assert(false);
        
      indices[i] = random_other-M;
      distances[i] = dist;
    }

    vec out_dist(num_ratings);
    ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings);
    assert(indices_sorted.size() <= num_ratings);
    assert(out_dist.size() <= num_ratings);
    vdata.ids = indices_sorted;
    vdata.ratings = out_dist;
    if (debug)
      printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]);

    if (vertex.id() % 1000 == 0)
      printf("Computing recommendations for user %d at time: %g\n", vertex.id()+1, mytimer.current_time());
  }
Example #14
0
vec lanczos( bipartite_graph_descriptor & info, timer & mytimer, vec & errest, 
            const std::string & vecfile){
   

   int nconv = 0;
   int its = 1;
   DistMat A(info);
   DistSlicedMat U(info.is_square() ? data_size : 0, info.is_square() ? 2*data_size : data_size, true, info, "U");
   DistSlicedMat V(0, data_size, false, info, "V");
   vec alpha, beta, b;
   vec sigma = zeros(data_size);
   errest = zeros(nv);
   DistVec v_0(info, 0, false, "v_0");
   if (vecfile.size() == 0)
     v_0 = randu(size(A,2));
   PRINT_VEC2("svd->V", v_0);
   
   DistDouble vnorm = norm(v_0);
   v_0=v_0/vnorm;
   PRINT_INT(nv);

   while(nconv < nsv && its < max_iter){
     std::cout<<"Starting iteration: " << its << " at time: " << mytimer.current_time() << std::endl;
     int k = nconv;
     int n = nv;
     PRINT_INT(k);
     PRINT_INT(n);

     alpha = zeros(n);
     beta = zeros(n);

     U[k] = V[k]*A._transpose();
     orthogonalize_vs_all(U, k, alpha(0));
     //alpha(0)=norm(U[k]).toDouble(); 
     PRINT_VEC3("alpha", alpha, 0);
     //U[k] = U[k]/alpha(0);

     for (int i=k+1; i<n; i++){
       std::cout <<"Starting step: " << i << " at time: " << mytimer.current_time() <<  std::endl;
       PRINT_INT(i);

       V[i]=U[i-1]*A;
       orthogonalize_vs_all(V, i, beta(i-k-1));
      
       //beta(i-k-1)=norm(V[i]).toDouble();
       //V[i] = V[i]/beta(i-k-1);
       PRINT_VEC3("beta", beta, i-k-1); 
      
       U[i] = V[i]*A._transpose();
       orthogonalize_vs_all(U, i, alpha(i-k));
       //alpha(i-k)=norm(U[i]).toDouble();

       //U[i] = U[i]/alpha(i-k);
       PRINT_VEC3("alpha", alpha, i-k);
     }

     V[n]= U[n-1]*A;
     orthogonalize_vs_all(V, n, beta(n-k-1));
     //beta(n-k-1)=norm(V[n]).toDouble();
     PRINT_VEC3("beta", beta, n-k-1);

  //compute svd of bidiagonal matrix
  PRINT_INT(nv);
  PRINT_NAMED_INT("svd->nconv", nconv);
  n = nv - nconv;
  PRINT_INT(n);
  alpha.conservativeResize(n);
  beta.conservativeResize(n);

  PRINT_MAT2("Q",eye(n));
  PRINT_MAT2("PT",eye(n));
  PRINT_VEC2("alpha",alpha);
  PRINT_VEC2("beta",beta);
 
  mat T=diag(alpha);
  for (int i=0; i<n-1; i++)
    set_val(T, i, i+1, beta(i));
  PRINT_MAT2("T", T);
  mat a,PT;
  svd(T, a, PT, b);
  PRINT_MAT2("Q", a);
  alpha=b.transpose();
  PRINT_MAT2("alpha", alpha);
  for (int t=0; t< n-1; t++)
     beta(t) = 0;
  PRINT_VEC2("beta",beta);
  PRINT_MAT2("PT", PT.transpose());

  //estiamte the error
  int kk = 0;
  for (int i=nconv; i < nv; i++){
    int j = i-nconv;
    PRINT_INT(j);
    sigma(i) = alpha(j);
    PRINT_NAMED_DBL("svd->sigma[i]", sigma(i));
    PRINT_NAMED_DBL("Q[j*n+n-1]",a(n-1,j));
    PRINT_NAMED_DBL("beta[n-1]",beta(n-1));
    errest(i) = abs(a(n-1,j)*beta(n-1));
    PRINT_NAMED_DBL("svd->errest[i]", errest(i));
    if (alpha(j) >  tol){
      errest(i) = errest(i) / alpha(j);
      PRINT_NAMED_DBL("svd->errest[i]", errest(i));
    }
    if (errest(i) < tol){
      kk = kk+1;
      PRINT_NAMED_INT("k",kk);
    }


    if (nconv +kk >= nsv){
      printf("set status to tol\n");
      finished = true;
    }
  }//end for
  PRINT_NAMED_INT("k",kk);


  vec v;
  if (!finished){
    vec swork=get_col(PT,kk); 
    PRINT_MAT2("swork", swork);
    v = zeros(size(A,1));
    for (int ttt=nconv; ttt < nconv+n; ttt++){
      v = v+swork(ttt-nconv)*(V[ttt].to_vec());
    }
    PRINT_VEC2("svd->V",V[nconv]);
    PRINT_VEC2("v[0]",v); 
  }


   //compute the ritz eigenvectors of the converged singular triplets
  if (kk > 0){
    PRINT_VEC2("svd->V", V[nconv]);
    mat tmp= V.get_cols(nconv,nconv+n)*PT;
    V.set_cols(nconv, nconv+kk, get_cols(tmp, 0, kk));
    PRINT_VEC2("svd->V", V[nconv]);
    PRINT_VEC2("svd->U", U[nconv]);
    tmp= U.get_cols(nconv, nconv+n)*a;
    U.set_cols(nconv, nconv+kk,get_cols(tmp,0,kk));
    PRINT_VEC2("svd->U", U[nconv]);
  }

  nconv=nconv+kk;
  if (finished)
    break;

  V[nconv]=v;
  PRINT_VEC2("svd->V", V[nconv]);
  PRINT_NAMED_INT("svd->nconv", nconv);

  its++;
  PRINT_NAMED_INT("svd->its", its);
  PRINT_NAMED_INT("svd->nconv", nconv);
  //nv = min(nconv+mpd, N);
  //if (nsv < 10)
  //  nv = 10;
  PRINT_NAMED_INT("nv",nv);

} // end(while)

printf(" Number of computed signular values %d",nconv);
printf("\n");
  DistVec normret(info, nconv, false, "normret");
  DistVec normret_tranpose(info, nconv, true, "normret_tranpose");
  for (int i=0; i < nconv; i++){
    normret = V[i]*A._transpose() -U[i]*sigma(i);
    double n1 = norm(normret).toDouble();
    PRINT_DBL(n1);
    normret_tranpose = U[i]*A -V[i]*sigma(i);
    double n2 = norm(normret_tranpose).toDouble();
    PRINT_DBL(n2);
    double err=sqrt(n1*n1+n2*n2);
    PRINT_DBL(err);
    PRINT_DBL(tol);
    if (sigma(i)>tol){
      err = err/sigma(i);
    }
    PRINT_DBL(err);
    PRINT_DBL(sigma(i));
    printf("Singular value %d \t%13.6g\tError estimate: %13.6g\n", i, sigma(i),err);
  }

  if (save_vectors){
     std::cout<<"Going to save output vectors U and V" << std::endl;
     if (nconv == 0)
       logstream(LOG_FATAL)<<"No converged vectors. Aborting the save operation" << std::endl;
     char output_filename[256];
     for (int i=0; i< nconv; i++){
        sprintf(output_filename, "%s.U.%d", training.c_str(), i);
        write_output_vector(output_filename, U[i].to_vec(), false, "GraphLab v2 SVD output. This file contains eigenvector number i of the matrix U");
        sprintf(output_filename, "%s.V.%d", training.c_str(), i);
        write_output_vector(output_filename, V[i].to_vec(), false, "GraphLab v2 SVD output. This file contains eigenvector number i of the matrix V'");
     }
  }
  return sigma;
}
Example #15
0
void parse(int i){    
  in_file fin(in_files[i]);
  out_file fout((outdir + in_files[i] + ".out"));

  size_t linesize = 0;
  char * saveptr = NULL, * linebuf = NULL;
  size_t line = 1;
  uint id;

  while(true){
    std::map<uint,uint> wordcount;
    int rc = getline(&linebuf, &linesize, fin.outf);
    if (rc < 1)
      break;
    if (strlen(linebuf) <= 1) //skip empty lines
      continue; 

    char *pch = strtok_r(linebuf, spaces, &saveptr);
    if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf << "]" << std::endl; return; }
    assign_id(frommap, id, pch);
    wordcount[id]+= 1;

    while(pch != NULL){
      pch = strtok_r(NULL, spaces ,&saveptr);
      if (pch != NULL && strlen(pch) > 1){ 
        assign_id(frommap, id, pch);
        wordcount[id]+= 1;
      }
    }  

    total_lines++;

    std::map<uint,uint>::const_iterator it;
    for (it = wordcount.begin(); it != wordcount.end(); it++){
      if ((int)it->second >= min_threshold && (int)it->second <= max_threshold)
        fprintf(fout.outf, "%lu %u %u\n", line, it->first, it->second);
    }

    line++;
    if (lines && line>=lines)
      break;

    if (debug && (line % 50000 == 0))
      logstream(LOG_INFO) << "Parsed line: " << line << " map size is: " << frommap.string2nodeid.size() << std::endl;
    if (frommap.string2nodeid.size() % 500000 == 0)
      logstream(LOG_INFO) << "Hash map size: " << frommap.string2nodeid.size() << " at time: " << mytime.current_time() << " edges: " << total_lines << std::endl;
  } 

  logstream(LOG_INFO) <<"Finished parsing total of " << line << " lines in file " << in_files[i] << endl <<
    "total map size: " << frommap.string2nodeid.size() << endl;

}
Example #16
0
int main(int argc,  const char *argv[]) {

	Rcpp::Rcout<<"GraphChi parsers library is written by Danny Bickson (c). Send any "
		" comments or bug reports to [email protected] " << std::endl;
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	graphchi_init(argc, argv);

	debug = get_option_int("debug", 0);
	dir = get_option_string("file_list");
	lines = get_option_int("lines", 0);
	omp_set_num_threads(get_option_int("ncpus", 1));
	from_val = get_option_int("from_val", from_val);
	to_val = get_option_int("to_val", to_val);
	if (from_val == -1)
		logstream(LOG_FATAL)<<"Must set from/to " << std::endl;
	mytime.start();

	FILE * f = fopen(dir.c_str(), "r");
	if (f == NULL)
		logstream(LOG_FATAL)<<"Failed to open file list!"<<std::endl;

	while(true){
		char buf[256];
		int rc = fscanf(f, "%s\n", buf);
		if (rc < 1)
			break;
		in_files.push_back(buf);
	}

	if (in_files.size() == 0)
		logstream(LOG_FATAL)<<"Failed to read any file frommap from the list file: " << dir << std::endl;

#pragma omp parallel for
	for (int i=0; i< (int)in_files.size(); i++)
		parse(i);

	std::cout << "Finished in " << mytime.current_time() << std::endl;

	int total_x =0 , total_y = 0;
	std::map<std::string, int>::iterator it;
	double h = 0;
	for (it = p_x.begin(); it != p_x.end(); it++){
		total_x+= it->second;
		h-= (it->second / (double)n)*log2(it->second / (double)n);
	}
	for (it = p_y.begin(); it != p_y.end(); it++)
		total_y+= it->second;
	assert(total_x == n);
	assert(total_y == n);


	double mi = 0;
	std::map<std::string, uint>::iterator iter;
	assert(n != 0);

	int total_p_xy = 0;
	for (iter = frommap.string2nodeid.begin() ; iter != frommap.string2nodeid.end(); iter++){
		double p_xy = iter->second / (double)n;
		assert(p_xy > 0);
		char buf[256];
		strncpy(buf, iter->first.c_str(), 256);
		char * first = strtok(buf, "_");
		char * second = strtok(NULL, "\n\r ");
		assert(first && second);
		double px = p_x[first] / (double)n;
		double py = p_y[second] / (double)n;
		assert(px > 0 && py > 0);
		mi += p_xy * log2(p_xy / (px * py));
		total_p_xy += iter->second;
	}
	assert(total_p_xy == n);
	logstream(LOG_INFO)<<"Total examples: " <<n << std::endl;

	logstream(LOG_INFO)<<"Unique p(x) " << p_x.size() << std::endl;
	logstream(LOG_INFO)<<"Unique p(y) " << p_y.size() << std::endl;
	logstream(LOG_INFO)<<"Average F(x) " << total_x / (double)p_x.size() << std::endl;
	logstream(LOG_INFO)<<"Average F(y) " << total_y / (double)p_y.size() << std::endl;

	std::cout<<"Mutual information of " << from_val << " [" << header_titles[from_val-1] << "] <-> " << to_val << " [" << header_titles[to_val-1] << "] is: " ;
	if (mi/h > 1e-3) 
		std::cout<<std::setprecision(3) << mi << std::endl;
	else std::cout<<"-"<<std::endl;
	save_map_to_text_file(frommap.string2nodeid, outdir + dir + "map.text");
	logstream(LOG_INFO)<<"Saving map file " << outdir << dir << "map.text" << std::endl;
	return 0;
}
Example #17
0
int main(int argc, const char ** argv) {
  /* GraphChi initialization will read the command line
     arguments and the configuration file. */
  graphchi_init(argc, argv);

  /* Metrics object for keeping track of performance counters
     and other information. Currently required. */
  metrics m("connected-components-inmem");

  /* Basic arguments for application */
  std::string filename = get_option_string("file");  // Base filename
  int niters           = get_option_int("niters", 100); // Number of iterations (max)
  int output_labels    = get_option_int("output_labels", 0); //output node labels to file?
  bool scheduler       = true;    // Always run with scheduler

  /* Process input file - if not already preprocessed */
  float p                 = get_option_float("p", -1);
  int n                 = get_option_int("n", -1);
  int quiet = get_option_int("quiet", 0);
  if (quiet)
    global_logger().set_log_level(LOG_ERROR);
  int nshards             = (int) convert_if_notexists<EdgeDataType>(filename, get_option_string("nshards", "auto"));
  mytimer.start();

  /* Run */
  ConnectedComponentsProgram program;
  graphchi_engine<VertexDataType, EdgeDataType> engine(filename, nshards, scheduler, m);
  engine.set_disable_vertexdata_storage();  
  engine.set_enable_deterministic_parallelism(false);
  engine.set_modifies_inedges(false);
  engine.set_modifies_outedges(false);
  engine.set_preload_commit(false);
  engine.set_maxwindow(engine.num_vertices());

  mytimer.start();

  active_nodes = new bool[engine.num_vertices()];
  for (int i=0; i< engine.num_vertices(); i++)
    active_nodes[i] = true;
  engine.run(program, niters);


  /* Run analysis of the connected components  (output is written to a file) */
  if (output_labels){
    FILE * pfile = fopen((filename + "-components").c_str(), "w");
    if (!pfile)
      logstream(LOG_FATAL)<<"Failed to open file: " << filename << std::endl;
    fprintf(pfile, "%%%%MatrixMarket matrix array real general\n");
    fprintf(pfile, "%lu %u\n", engine.num_vertices()-1, 1);
    for (uint i=1; i< engine.num_vertices(); i++){
      fprintf(pfile, "%u\n", vertex_values[i]);
      assert(vertex_values[i] >= 0 && vertex_values[i] < engine.num_vertices());
    }
    fclose(pfile); 
    logstream(LOG_INFO)<<"Saved succesfully to out file: " << filename << "-components" << " time for saving: " << mytimer.current_time() << std::endl;
  } 

  std::cout<<"Total runtime: " << mytimer.current_time() << std::endl;
  if (p > 0)
    std::cout << "site fraction p= " << p << std::endl;
  if (n > 0){
    std::cout << "n=" << n*p << std::endl;
    std::cout << "isolated sites: " << p*(double)n-actual_vertices << std::endl;
  }
  std::cout << "Number of sites: " << actual_vertices << std::endl;
  std::cout << "Number of bonds: " << engine.num_edges() << std::endl;
  if (n){
    std::cout << "Percentage of sites: " << (double)actual_vertices / (double)n << std::endl;
    std::cout << "Percentage of bonds: " << (double)engine.num_edges() / (2.0*n) << std::endl;
  }
  std::cout  << "Number of iterations: " << iter << std::endl;
  std::cout << "SITES RESULT:\nsize\tcount\n";
  std::map<uint,uint> final_countsv;
  std::map<uint,uint> final_countse;
  std::map<uint,uint> statv;
  for (int i=0; i< engine.num_vertices(); i++)
    statv[vertex_values[i]]++;


  uint total_sites = 0;
  for (std::map<uint, uint>::const_iterator iter = statv.begin();
      iter != statv.end(); iter++) {
    //std::cout << iter->first << "\t" << iter->second << "\n";
    final_countsv[iter->second] += 1;
    total_sites += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countsv.begin();
      iter != final_countsv.end(); iter++) {
    std::cout << iter->first << "\t" << iter->second << "\n";
  }
  edge_count = 1;
  engine.run(program, 1);
  std::cout << "BONDS RESULT:\nsize\tcount\n";
  uint total_bonds = 0;
  for (std::map<uint, uint>::const_iterator iter = state.begin();
      iter != state.end(); iter++) {
    //std::cout << iter->first << "\t" << iter->second << "\n";
    final_countse[iter->second] += 1;
    total_bonds += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countse.begin();
      iter != final_countse.end(); iter++) {
    std::cout << iter->first << "\t" << iter->second << "\n";
  }
  assert(total_sites == graph.num_vertices());
  assert(total_bonds == graph.num_edges());

  return 0;
}
Example #18
0
/* example file format:
 * 2884424247 11 1210682095 1789803763 1879013170 1910216645 2014570227
 * 2109318072 2268277425 2289674265 2340794623 2513611825 2770280793
 * 2884596247 31 1191220232 1191258123 1225281292 1240067740
 * 2885009247 16 1420862042 1641392180 1642909335 1775498871 1781379945
 * 1784537661 1846581167 1934183965 2011304655 2016713117 2017390697
 * 2128869911 2132021133 2645747085 2684129850 2866009832
 */ 
void parse(int i){    
  in_file fin(in_files[i]);
  out_file fout((outdir + in_files[i] + ".out"));

  size_t linesize = 0;
  char * saveptr = NULL, * linebuf = NULL;
  char linebuf_debug[1024];
  size_t line = 1;
  uint from,to;
  bool matrix_market = false;

  while(true){
    int rc = getline(&linebuf, &linesize, fin.outf);
    strncpy(linebuf_debug, linebuf, 1024);
    if (rc < 1)
      break;
    if (strlen(linebuf) <= 1) //skip empty lines
      continue;
   //skipping over matrix market header (if any) 
    if (!strncmp(linebuf, "%%MatrixMarket", 14)){
      matrix_market = true;
      continue;
    }
    if (matrix_market && linebuf[0] == '%'){
      continue;
    }
    if (matrix_market && linebuf[0] != '%'){
      matrix_market = false;
      continue;
    }

    //read [FROM]
    char *pch = strtok_r(linebuf,string_to_tokenize, &saveptr);
    if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf_debug << "]" << std::endl; return; }
    assign_id(string2nodeid,nodeid2hash, from, pch, true);

    //read [NUMBER OF EDGES]
    pch = strtok_r(NULL,string_to_tokenize, &saveptr);
    if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf_debug << "]" << std::endl; return; }
    int num_edges = atoi(pch);
    if (num_edges < 0)
    { logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf_debug << "] - number of edges < 0" << std::endl; return;   }
    
    for (int k=0; k< num_edges; k++){
      pch = strtok_r(NULL, "\n\t\r, ", &saveptr);
      if (!pch){ logstream(LOG_ERROR) << "Error when parsing file: " << in_files[i] << ":" << line << "[" << linebuf_debug << "]" << std::endl; return; }
 
    assign_id(single_domain ? string2nodeid:string2nodeid2,
        single_domain ? nodeid2hash : nodeid2hash2, to, pch, single_domain ? true : false);
   if (tsv)
      fprintf(fout.outf, "%u\t%u\n", from, to);
    else if (csv)
      fprintf(fout.outf, "%u,%un", from, to);
    else 
      fprintf(fout.outf, "%u %u\n", from, to);
    nnz++;
  }

      line++;
    total_lines++;
    if (lines && line>=lines)
      break;

    if (debug && (line % 50000 == 0))
      logstream(LOG_INFO) << "Parsed line: " << line << " map size is: " << string2nodeid.size() << std::endl;
    if (string2nodeid.size() % 500000 == 0)
      logstream(LOG_INFO) << "Hash map size: " << string2nodeid.size() << " at time: " << mytime.current_time() << " edges: " << total_lines << std::endl;
  } 

  logstream(LOG_INFO) <<"Finished parsing total of " << line << " lines in file " << in_files[i] << endl <<
    "total map size: " << string2nodeid.size() << endl;

}
Example #19
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &gcontext) {
    if (debug)
      printf("Entered iteration %d with %d\n", gcontext.iteration, v.id());
 
    /* even iteration numbers:
     * 1) load a subset of items into memory (pivots)
     * 2) Find which subset of items needs to compared to the users
     */
    if (gcontext.iteration % 2 == 0) {
      if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){
        adjcontainer->load_edges_into_memory(v);         
        if (debug)
          printf("Loading pivot %dintro memory\n", v.id());
      }
      else if (is_user(v.id())){

        //in the zero iteration, if using AA/RA/PROB distance metric, initialize array
        //with node degrees 
        if (gcontext.iteration == 0 && (distance_metric == AA || distance_metric == RA || distance_metric == PROB)){
           latent_factors_inmem[v.id()].degree = v.num_edges();
        }

        //check if this user is connected to any pivot item
        bool has_pivot = false;
        int pivot = -1;
        for(int i=0; i<v.num_edges(); i++) {
          graphchi_edge<uint32_t> * e = v.edge(i);
          //assert(is_item(e->vertexid)); 
          if (adjcontainer->is_pivot(e->vertexid)) {
            has_pivot = true;
            pivot = e->vertexid;
            break;
          }
        }
        if (debug)
          printf("user %d is linked to pivot %d\n", v.id(), pivot);
        if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus
          //it is not relevant at this point
          return; 

        //this user is connected to a pivot items, thus all connected items should be compared
        for(int i=0; i<v.num_edges(); i++) {
          graphchi_edge<uint32_t> * e = v.edge(i);
          //assert(v.id() != e->vertexid);
          relevant_items[e->vertexid - M] = true;
        }
      }//is_user 

    } //iteration % 2 =  1
    /* odd iteration number:
     * 1) For any item connected to a pivot item
     *       compute itersection
     */
    else {
      if (!relevant_items[v.id() - M]){
        if (debug)
          logstream(LOG_DEBUG)<<"Skipping item: " << v.id() << " since not relevant" << std::endl;
        return;
      }
      std::vector<index_val> heap;

      

      for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){
        //if JACCARD which is symmetric, compare only to pivots which are smaller than this item id
        if ((distance_metric != ASYM_COSINE && i >= v.id()) || (!relevant_items[i-M]))
          continue;
        else if (distance_metric == ASYM_COSINE && i == v.id())
          continue;
        
        double dist = adjcontainer->calc_distance(v, i, distance_metric);
        item_pairs_compared++;
        if (item_pairs_compared % 10000000 == 0)
          logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " <<  std::setw(10) <<sum(written_pairs) << " written. " << std::endl;

        if (debug)
          printf("comparing %d to pivot %d distance is %g\n", i - M + 1, v.id() - M + 1, dist);
        if (dist != 0){
          heap.push_back(index_val(i, dist)); 
        }
        else zero_dist++;
      }
      sort(heap.begin(), heap.end(), &Greater);
      int thread_num = omp_get_thread_num();
      if (heap.size() < K)
        not_enough++;
      for (uint i=0; i< std::min(heap.size(), (size_t)K); i++){
          int rc = fprintf(out_files[thread_num], "%u %u %.12lg\n", v.id()-M+1, heap[i].index-M+1, (double)heap[i].val);//write item similarity to file
          written_pairs[omp_get_thread_num()]++;
         if (rc <= 0){
            perror("Failed to write output");
            logstream(LOG_FATAL)<<"Failed to write output to: file: " << training << omp_get_thread_num() << ".out" << std::endl;  
         }
      }
    }//end of iteration % 2 == 1
  }//end of update function
Example #20
0
  /**
   *  Vertex update function.
   */
  void update(CE_Graph_vertex<VertexDataType, EdgeDataType> &v, CE_Graph_context &gcontext) {
    if (debug)
      printf("Entered iteration %d with %d\n", gcontext.iteration, v.id());

    //in the zero iteration compute the mean
    if (gcontext.iteration == 0){
      if (is_item(v.id())){
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          vid_t user = e->vertexid;
          mean[user] += e->get_data() / (float)N;
        }
      }
    }
    //at the first iteration compute the stddev of each item from the mean
    else if (gcontext.iteration == 1){
      if (is_item(v.id())){
        dense_adj item_edges; 
        for(int i=0; i < v.num_edges(); i++) 
          set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data());
        stddev[v.id() - M] = sum(minus(item_edges.edges, mean).array().pow(2)) / (M-1.0);
        if (debug)
          std::cout<<"item: " << v.id() - M+1 << " stddev: " << stddev[v.id() - M] << std::endl;
      }
    }

    /* even iteration numbers:
     * 1) load a subset of items into memory (pivots)
     * 2) Find which subset of items needs to compared to the users
     */
    else if (gcontext.iteration % 2 == 0) {
      if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){
        adjcontainer->load_edges_into_memory(v);         
        if (debug)
          printf("Loading pivot %d intro memory\n", v.id());
      }
      else if (is_user(v.id())){
        //check if this user is connected to any pivot item
        bool has_pivot = false;
        int pivot = -1;
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          //assert(is_item(e->vertexid)); 
          if (adjcontainer->is_pivot(e->vertexid) && relevant_items[e->vertexid-M]) {
            has_pivot = true;
            pivot = e->vertexid;
            break;
          }
        }
        if (debug)
          printf("user %d is linked to pivot %d\n", v.id(), pivot);
        if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus
          //it is not relevant at this point
          return; 

        //this user is connected to a pivot items, thus all connected items should be compared
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          //assert(v.id() != e->vertexid);
          relevant_items[e->vertexid - M] = true;
        }
      }//is_user 
    } //iteration % 2 =  1
    /* odd iteration number:
     * 1) For any item connected to a pivot item
     *       compute itersection
     */
    else {
      if (!relevant_items[v.id() - M]){
        return;
      }

      for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){
        //since metric is symmetric, compare only to pivots which are smaller than this item id
        if (i >= v.id() || (!relevant_items[i-M]))
          continue;

        double dist = adjcontainer->calc_distance(v, i, distance_metric);
        item_pairs_compared++;
        if (item_pairs_compared % 1000000 == 0)
          logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
        if (debug)
          printf("comparing %d to pivot %d distance is %lg\n", i - M + 1, v.id() - M + 1, dist);
        if (dist != 0){
          fprintf(out_files[omp_get_thread_num()], "%u %u %.12lg\n", v.id()-M+1, i-M+1, (double)dist);//write item similarity to file
          //where the output format is: 
          //[item A] [ item B ] [ distance ] 
          written_pairs++;
        }
      }
    }//end of iteration % 2 == 1
  }//end of update function