/**
  * Called after an iteration has finished.
  */
 void after_iteration(int iteration, graphchi_context &gcontext) {
   /*
    * If there were changes in the current iteration, then iterate once more.
    * If there were no changes, stop execution by setting current iteration to last iteration. 
    */
     if(gcontext.scheduler->num_tasks() > 0)
         gcontext.set_last_iteration(iteration+1);
     else
         gcontext.set_last_iteration(iteration);
    
 }
Beispiel #2
0
/**
  compute validation rmse
  */
void validation_rmse3(float (*prediction_func)(const vertex_data & user, const vertex_data & movie, const vertex_data & time, float rating, double & prediction)
    ,graphchi_context & gcontext,int tokens_per_row = 4) {
  int ret_code;
  MM_typecode matcode;
  FILE *f;
  size_t nz;   

  if ((f = fopen(validation.c_str(), "r")) == NULL) {
    std::cout<<std::endl;
    return; //missing validaiton data, nothing to compute
  }

  if (mm_read_banner(f, &matcode) != 0)
    logstream(LOG_FATAL) << "Could not process Matrix Market banner. File: " << validation << std::endl;

  if (mm_is_complex(matcode) || !mm_is_sparse(matcode))
    logstream(LOG_FATAL) << "Sorry, this application does not support complex values and requires a sparse matrix." << std::endl;

  /* find out size of sparse matrix .... */
  if ((ret_code = mm_read_mtx_crd_size(f, &Me, &Ne, &nz)) !=0) {
    logstream(LOG_FATAL) << "Failed reading matrix size: error=" << ret_code << std::endl;
  }
  if ((M > 0 && N > 0) && (Me != M || Ne != N))
    logstream(LOG_FATAL)<<"Input size of validation matrix must be identical to training matrix, namely " << M << "x" << N << std::endl;

  Le = nz;

  last_validation_rmse = dvalidation_rmse;
  dvalidation_rmse = 0;   
  int I, J;
  double val, time = 1.0;
 
  for (size_t i=0; i<nz; i++)
  {
   int rc;
    rc = fscanf(f, "%d %d %lg %lg\n", &I, &J, &time, &val);

    if (rc != tokens_per_row)
      logstream(LOG_FATAL)<<"Error when reading input file on line: " << i << " . should have" << tokens_per_row << std::endl;
    if (val < minval || val > maxval)
      logstream(LOG_FATAL)<<"Value is out of range: " << val << " should be: " << minval << " to " << maxval << std::endl;
    if ((uint)time > K)
      logstream(LOG_FATAL)<<"Third column value time should be smaller than " << K << " while observed " << time << " in line : " << i << std::endl;

    I--;  /* adjust from 1-based to 0-based */
    J--;
    double prediction;
    (*prediction_func)(latent_factors_inmem[I], latent_factors_inmem[J+M], latent_factors_inmem[M+N+(uint)time], val, prediction);
    dvalidation_rmse += pow(prediction - val, 2);
  }
  fclose(f);

  assert(Le > 0);
  dvalidation_rmse = sqrt(dvalidation_rmse / (double)Le);
  std::cout<<"  Validation RMSE: " << std::setw(10) << dvalidation_rmse << std::endl;
  if (halt_on_rmse_increase && dvalidation_rmse > last_validation_rmse && gcontext.iteration > 0){
       logstream(LOG_WARNING)<<"Stopping engine because of validation RMSE increase" << std::endl;
       gcontext.set_last_iteration(gcontext.iteration);
    }
}
    /**
     * Called before an execution interval is started.
     *
     * On every even iteration, we load pivot's item connected user lists to memory.
     * Here we manage the memory to ensure that we do not load too much
     * edges into memory.
     */
    void before_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &gcontext) {

        /* on even iterations, loads pivot items into memory base on the membudget_mb allowed memory size */
        if ((gcontext.iteration % 2 == 0)) {
            //if (debug){
            printf("entering iteration: %d on before_exec_interval\n", gcontext.iteration);
            printf("pivot_st is %d window_St %d, window_en %d\n", adjcontainer->pivot_st, window_st, window_en);
            //}
            if (adjcontainer->pivot_st < std::min(std::min((int)M,end_user), (int)window_en)) {
                size_t max_grab_edges = get_option_long("membudget_mb", 1024) * 1024 * 1024 / 8;
                if (grabbed_edges < max_grab_edges * 0.8) {
                    Rcpp::Rcerr << "Window init, grabbed: " << grabbed_edges << " edges" << " extending pivor_range to : " << window_en + 1 << std::endl;
                    adjcontainer->extend_pivotrange(std::min(std::min((int)M, end_user), (int)window_en + 1));
                    Rcpp::Rcerr << "Window en is: " << window_en << " vertices: " << gcontext.nvertices << std::endl;
                    if (window_en+1 >= gcontext.nvertices) {
                        // every user was a pivot item, so we are done
                        Rcpp::Rcerr<<"Setting last iteration to: " << gcontext.iteration + 2 << std::endl;
                        gcontext.set_last_iteration(gcontext.iteration + 2);
                    }
                } else {
                    Rcpp::Rcerr << "Too many edges, already grabbed: " << grabbed_edges << std::endl;
                }
            }
        }

    }
 /**
  * Called after an iteration has finished.
  */
 void after_iteration(int iteration, graphchi_context &ginfo) {
   logstream(LOG_DEBUG)<<mytimer.current_time() << "iteration: " << iteration << " changes: " << changes << std::endl;
   if (changes == 0)
     ginfo.set_last_iteration(iteration);
   changes = 0;
   iter++;
 }
Beispiel #5
0
  /**
   * Called before an execution interval is started.
   *
   * On every even iteration, we load pivot's item connected user lists to memory. 
   * Here we manage the memory to ensure that we do not load too much
   * edges into memory.
   */
  void before_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &gcontext) {        

    /* on even iterations, loads pivot items into memory base on the membudget_mb allowed memory size */
    if (gcontext.iteration % 2 == 0) {
      if (!quiet){
        printf("entering iteration: %d on before_exec_interval\n", gcontext.iteration);
        printf("pivot_st is %d window_en %d\n", adjcontainer->pivot_st, window_en);
      }
      if (adjcontainer->pivot_st <= window_en) {
        size_t max_grab_edges = get_option_long("membudget_mb", 1024) * 1024 * 1024 / 8;
        if (grabbed_edges < max_grab_edges * 0.8) {
          logstream(LOG_DEBUG) << "Window init, grabbed: " << grabbed_edges << " edges" << " extending pivor_range to : " << window_en + 1 << std::endl;
          adjcontainer->extend_pivotrange(window_en + 1);
          logstream(LOG_DEBUG) << "Window en is: " << window_en << " vertices: " << gcontext.nvertices << std::endl;
          if (window_en+1 == gcontext.nvertices) {
            // every item was a pivot item, so we are done
            logstream(LOG_DEBUG)<<"Setting last iteration to: " << gcontext.iteration + 2 << std::endl;
            gcontext.set_last_iteration(gcontext.iteration + 2);                    
          }
        } else {
          logstream(LOG_DEBUG) << "Too many edges, already grabbed: " << grabbed_edges << std::endl;
        }
      }
    }

  }
    void after_iteration(int iteration, graphchi_context &gcontext) {
        //first_iteration = false;
		if(converged){
			logstream(LOG_INFO)<<"scc_forward has finished!"<<std::endl;
			gcontext.set_last_iteration(iteration);
		}
    }
    /**
      * Pagerank update function.
      */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
        float sum=0;
        if (ginfo.iteration == 0) {
            /* On first iteration, initialize vertex and out-edges. 
               The initialization is important,
               because on every run, GraphChi will modify the data in the edges on disk. 
             */
	    update_edge_data(v, 1.0);
            v.set_data(RANDOMRESETPROB); 
        } else {
            /* Compute the sum of neighbors' weighted pageranks by
               reading from the in-edges. */
            for(int i=0; i < v.num_inedges(); i++) {
                //float val = v.inedge(i)->get_data();
                //sum += val;                    
		struct weightE eData = v.inedge(i)->get_data();
		sum += eData.pagerank;
            }
            
            /* Compute my pagerank */
            float pagerank = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum;
            
            /* Write my pagerank divided by the number of out-edges to
               each of my out-edges. */
	    update_edge_data(v, pagerank);
                
            /* Keep track of the progression of the computation.
               GraphChi engine writes a file filename.deltalog. */
            ginfo.log_change(std::abs(pagerank - v.get_data()));
            
            /* Set my new pagerank as the vertex value */
            v.set_data(pagerank); 
        }
    }
    /**
      * Called after an iteration has finished. Not implemented.
      */
    void after_iteration(int iteration, graphchi_context &ginfo) {
		if(iteration == 0){

		}else if(iteration == 1){
			fflush(fp_list);
			ginfo.set_last_iteration(iteration);	
		}
    }
	 void after_iteration(int iteration, graphchi_context &gcontext) {
	//	if(converged){
	//		logstream(LOG_INFO)<<"scc_backward has finished!"<<std::endl;
			fflush(fpout);
			//fflush(fpout1);
			gcontext.set_last_iteration(iteration);
	//	}
	}
	/**
	 * Called after an iteration has finished.
	 */
	void after_iteration(int iteration, graphchi_context &ginfo) {
		/*	
			if (converged) {
			std::cout << "Converged!" << std::endl;
			ginfo.set_last_iteration(iteration);
			}
			*/
		if(iteration == 1)	
			ginfo.set_last_iteration(iteration);
	}
Beispiel #11
0
void run_validation4(graphchi_engine<VertexDataType, EdgeDataType> * pvalidation_engine, graphchi_context & context){
   //no validation data, no need to run validation engine calculations
   cur_iteration = context.iteration;
   if (pvalidation_engine == NULL){
     std::cout << std::endl;
     return;
   }
   ValidationRMSEProgram4 program;
   pvalidation_engine->run(program, 1);
   if (converged_engine)
     context.set_last_iteration(cur_iteration);
}
Beispiel #12
0
 /**
  * Called after an iteration has finished.
  */
 void after_iteration(int iteration, graphchi_context &gcontext) {
     training_objective = sum(objective_vec);
     Rcpp::Rcout<<"  Training objective:" << std::setw(10) << training_objective << std::endl;
     if (halt_on_mrr_decrease > 0 && halt_on_mrr_decrease < cur_iteration && training_objective < last_training_objective) {
         Rcpp::Rcout << "Stopping engine because of validation objective decrease" << std::endl;
         gcontext.set_last_iteration(gcontext.iteration);
     }
     Rcpp::Rcerr << "after_iteration: running validation engine" << std::endl;
     run_validation(pvalidation_engine, gcontext);
     if (verbose)
         Rcpp::Rcout<<"Average step size: " << sum(stat_vec)/(double)M << "Node without edges: " << node_without_edges << std::endl;
     sgd_gamma *= sgd_step_dec;
 }
Beispiel #13
0
 /**
  * Called after an iteration has finished.
  */
 void after_iteration(int iteration, graphchi_context &gcontext)
 {
   training_objective = sum(objective_vec);
   std::cout<<"  Training objective:" << std::setw(10) << training_objective << std::endl;
   if (halt_on_mrr_decrease > 0 && halt_on_mrr_decrease < cur_iteration && training_objective < last_training_objective)
   {
     logstream(LOG_WARNING) << "Stopping engine because of validation objective decrease" << std::endl;
     gcontext.set_last_iteration(gcontext.iteration);
   }
   logstream(LOG_DEBUG) << "after_iteration: running validation engine" << std::endl;
   run_validation(pvalidation_engine, gcontext);
   sgd_gamma *= sgd_step_dec;
 }
  /**
   * Called after an iteration has finished. Aggregates the model updates and
   * the evaluation measure.
   */
  void after_iteration(int iteration, graphchi_context &ginfo) {
    // TODO: to separate class?

//    std::cout << "LINREG_UPDATES:" << std::endl;
    /* Add the delta. */
    for (int i = 0; i < ginfo.execthreads; i++) {
//      std::cout << "GRADIENT" << std::endl;
//      std::cout << parallel_models[i]->str() << std::endl << std::endl;
      parallel_models[i]->update_parent(num_queries);
    }
//    std::cout << "LINREG_UPDATE AFTER ";
//    LinearRegression* lr_model = (LinearRegression*)model;
//    std::copy(lr_model->weights.begin(), lr_model->weights.end(),
//              std::ostream_iterator<double>(std::cout, " "));
//    std::cout << std::endl;

    if (phase == TRAINING || phase == VALIDATION || phase == TESTING) {
      eval->after_iteration(iteration, ginfo);

      // Debugging stuff; remove if not needed anymore.
//      std::cout << "WEIGHTS: ";
//      std::copy(((LinearRegression*)model)->weights.begin(), ((LinearRegression*)model)->weights.end(), std::ostream_iterator<double>(std::cout, " "));
//      std::cout << std::endl;
//      std::cout << "NDCG: ";
//      for (std::map<vid_t, double>::const_iterator it = eval->eval.begin();
//          it != eval->eval.end(); ++it) {
//        std::cout << it->second << " ";
//      }
//      std::copy(eval->eval.begin(), eval->eval.end(), std::ostream_iterator<double>(std::cout, " "));
//      std::cout << ", avg: " << eval->avg_eval << std::endl << std::endl;
      std::cout << "AVG NDCG: " << eval->avg_eval << std::endl;
    }

    /** Stop if the evaluation results get worse. */
    if (stop == STOP_TRAINING) {
      if (phase == TRAINING) {
        if (eval->avg_eval < last_eval_value) {
          if (last_model.get() != NULL) {
            model = last_model.release();
          }
          ginfo.set_last_iteration(ginfo.iteration);
        } else {
          last_eval_value = eval->avg_eval;
          last_model.reset(model->clone());
        }
      }
    }
  }
void run_validation(graphchi_engine<VertexDataType, EdgeDataType> * pvalidation_engine, graphchi_context & context){
  //no validation data, no need to run validation engine calculations
  cur_iteration = context.iteration;
  if (pvalidation_engine == NULL)
    return;
  if (calc_ap){ //AP
    ValidationAPProgram program;
    pvalidation_engine->run(program, 1);
  }
  else { //RMSE
    ValidationRMSEProgram program;
    pvalidation_engine->run(program, 1);
  }
  if (converged_engine)
    context.set_last_iteration(context.iteration);
}
    /**
      * Called after an iteration has finished. Not implemented.
      */
    void after_iteration(int iteration, graphchi_context &ginfo) {
		if(iteration == 0){
			/*
			vid_t count = 1;
			std::sort(degvector.begin(), degvector.end(), sortFunc);		
			for(int i=0; i<degvector.size(); i++){
				idmap[degvector[i].vid] = count++;	
				fprintf(fp_vt, "%d\t%d\t%d\n", i, degvector[i].vid, degvector[i].deg);
			}
			fflush(fp_vt);		
			*/
			//fp_edgelist = fopen();
		}else if(iteration == 1){
			fflush(fp_metis);
			ginfo.set_last_iteration(iteration);	
		}
    }
 /**
  * Called before an execution interval is started.
  *
  * On every even iteration, we store pivot's adjacency lists to memory. 
  * Here we manage the memory to ensure that we do not load too much
  * edges into memory.
  */
 void before_exec_interval(vid_t window_st, vid_t window_en, graphchi_context &gcontext) {        
     if (gcontext.iteration % 2 == 0) {
         if (adjcontainer->pivot_st <= window_en) {
             size_t max_grab_edges = get_option_long("membudget_mb", 1024) * 1024 * 1024 / 8;
             if (grabbed_edges < max_grab_edges * 0.8) {
                 logstream(LOG_DEBUG) << "Window init, grabbed: " << grabbed_edges << " edges" << std::endl;
                 for(vid_t vid=window_st; vid <= window_en; vid++) {
                     gcontext.scheduler->add_task(vid);
                 }
                 adjcontainer->extend_pivotrange(window_en + 1);
                 if (window_en == gcontext.nvertices) {
                     // Last iteration needed for collecting last triangle counts
                     gcontext.set_last_iteration(gcontext.iteration + 3);                    
                 }
             } else {
                 std::cout << "Too many edges, already grabbed: " << grabbed_edges << std::endl;
             }
         }
     }
     
 }
    /**
      * Pagerank update function.
      */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
        float sum=0;
	float prv = 0.0;
	float pagerankcont = 0.0;	

        if (ginfo.iteration == 0) {
            /* On first iteration, initialize vertex and out-edges. 
               The initialization is important,
               because on every run, GraphChi will modify the data in the edges on disk. 
             */
	    /* For the weighted version */
	    update_edge_data(v, 1.0, true);
            v.set_data(RANDOMRESETPROB); 
            //v.set_data(1.0); 
        } else {
	    /* We need to come up with the weighted version */
            for(int i=0; i < v.num_inedges(); i++) {
                chivector<float> * evector = v.inedge(i)->get_vector();
                assert(evector->size() >= 2);
                sum += evector->get(1);
    		//std::cout <<  v.id() << " with data: " << evector->get(1) << " with weight " << evector->get(0) << std::endl;
    		//std::cout <<  v.id() << " edge endpoint: " << v.inedge(i)->vertex_id() << std::endl;
		//evector->clear();
	    }

            /* Compute my pagerank */
            prv = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum;
	    //std::cout << "sum" << sum << "pagerank: " << prv << std::endl;

	    update_edge_data(v, prv, false);
            /* Keep track of the progression of the computation.
               GraphChi engine writes a file filename.deltalog. */
	    double delta = std::abs(prv - v.get_data());
	    //std::cout << "pagerank: " << prv << "v.data" << v.get_data() << "delta: " << delta << std::endl;
            ginfo.log_change(delta);
            
            /* Set my new pagerank as the vertex value */
            v.set_data(prv);
        }
    }
Beispiel #19
0
	void after_iteration(int iteration, graphchi_context &gcontext) {
		if(iteration == 1){
			fflush(vmap);
			gcontext.set_last_iteration(iteration);
		}
	}
Beispiel #20
0
	 void after_iteration(int iteration, graphchi_context &gcontext) {
			gcontext.set_last_iteration(iteration);
	}