Пример #1
0
static int
iterate (void *vstate, gsl_multifit_function_fdf * fdf, gsl_vector * x, gsl_vector * f, gsl_matrix * J, gsl_vector * dx, int scale)
{
  lmder_state_t *state = (lmder_state_t *) vstate;

  gsl_matrix *r = state->r;
  gsl_vector *tau = state->tau;
  gsl_vector *diag = state->diag;
  gsl_vector *qtf = state->qtf;
  gsl_vector *x_trial = state->x_trial;
  gsl_vector *f_trial = state->f_trial;
  gsl_vector *rptdx = state->rptdx;
  gsl_vector *newton = state->newton;
  gsl_vector *gradient = state->gradient;
  gsl_vector *sdiag = state->sdiag;
  gsl_vector *w = state->w;
  gsl_vector *work1 = state->work1;
  gsl_permutation *perm = state->perm;

  double prered, actred;
  double pnorm, fnorm1, fnorm1p, gnorm;
  double ratio;
  double dirder;

  int iter = 0;

  double p1 = 0.1, p25 = 0.25, p5 = 0.5, p75 = 0.75, p0001 = 0.0001;

  if (state->fnorm == 0.0) 
    {
      return GSL_SUCCESS;
    }

  /* Compute qtf = Q^T f */

  gsl_vector_memcpy (qtf, f);
  gsl_linalg_QR_QTvec (r, tau, qtf);

  /* Compute norm of scaled gradient */

  compute_gradient_direction (r, perm, qtf, diag, gradient);

  { 
    size_t iamax = gsl_blas_idamax (gradient);

    gnorm = fabs(gsl_vector_get (gradient, iamax) / state->fnorm);
  }

  /* Determine the Levenberg-Marquardt parameter */

lm_iteration:
  
  iter++ ;

  {
    int status = lmpar (r, perm, qtf, diag, state->delta, &(state->par), newton, gradient, sdiag, dx, w);
    if (status)
      return status;
  }

  /* Take a trial step */

  gsl_vector_scale (dx, -1.0); /* reverse the step to go downhill */

  compute_trial_step (x, dx, state->x_trial);

  pnorm = scaled_enorm (diag, dx);

  if (state->iter == 1)
    {
      if (pnorm < state->delta)
        {
#ifdef DEBUG
          printf("set delta = pnorm = %g\n" , pnorm);
#endif
          state->delta = pnorm;
        }
    }

  /* Evaluate function at x + p */
  /* return immediately if evaluation raised error */
  {
    int status = GSL_MULTIFIT_FN_EVAL_F (fdf, x_trial, f_trial);
    if (status)
      return status;
  }

  fnorm1 = enorm (f_trial);

  /* Compute the scaled actual reduction */

  actred = compute_actual_reduction (state->fnorm, fnorm1);

#ifdef DEBUG
  printf("lmiterate: fnorm = %g fnorm1 = %g  actred = %g\n", state->fnorm, fnorm1, actred);
  printf("r = "); gsl_matrix_fprintf(stdout, r, "%g");
  printf("perm = "); gsl_permutation_fprintf(stdout, perm, "%d");
  printf("dx = "); gsl_vector_fprintf(stdout, dx, "%g");
#endif

  /* Compute rptdx = R P^T dx, noting that |J dx| = |R P^T dx| */

  compute_rptdx (r, perm, dx, rptdx);

#ifdef DEBUG
  printf("rptdx = "); gsl_vector_fprintf(stdout, rptdx, "%g");
#endif

  fnorm1p = enorm (rptdx);

  /* Compute the scaled predicted reduction = |J dx|^2 + 2 par |D dx|^2 */

  { 
    double t1 = fnorm1p / state->fnorm;
    double t2 = (sqrt(state->par) * pnorm) / state->fnorm;
    
    prered = t1 * t1 + t2 * t2 / p5;
    dirder = -(t1 * t1 + t2 * t2);
  }

  /* compute the ratio of the actual to predicted reduction */

  if (prered > 0)
    {
      ratio = actred / prered;
    }
  else
    {
      ratio = 0;
    }

#ifdef DEBUG
  printf("lmiterate: prered = %g dirder = %g ratio = %g\n", prered, dirder,ratio);
#endif


  /* update the step bound */

  if (ratio > p25)
    {
#ifdef DEBUG
      printf("ratio > p25\n");
#endif
      if (state->par == 0 || ratio >= p75)
        {
          state->delta = pnorm / p5;
          state->par *= p5;
#ifdef DEBUG
          printf("updated step bounds: delta = %g, par = %g\n", state->delta, state->par);
#endif
        }
    }
  else
    {
      double temp = (actred >= 0) ? p5 : p5*dirder / (dirder + p5 * actred);

#ifdef DEBUG
      printf("ratio < p25\n");
#endif

      if (p1 * fnorm1 >= state->fnorm || temp < p1 ) 
        {
          temp = p1;
        }

      state->delta = temp * GSL_MIN_DBL (state->delta, pnorm/p1);

      state->par /= temp;
#ifdef DEBUG
      printf("updated step bounds: delta = %g, par = %g\n", state->delta, state->par);
#endif
    }


  /* test for successful iteration, termination and stringent tolerances */

  if (ratio >= p0001)
    {
      gsl_vector_memcpy (x, x_trial);
      gsl_vector_memcpy (f, f_trial);

      /* return immediately if evaluation raised error */
      {
        int status;
        
        if (fdf->df)
          status = GSL_MULTIFIT_FN_EVAL_DF (fdf, x_trial, J);
        else
          status = gsl_multifit_fdfsolver_dif_df(x_trial, fdf, f_trial, J);

        if (status)
          return status;
      }

      /* wa2_j  = diag_j * x_j */
      state->xnorm = scaled_enorm(diag, x);
      state->fnorm = fnorm1;
      state->iter++;

      /* Rescale if necessary */

      if (scale)
        {
          update_diag (J, diag);
        }

      {
        int signum;
        gsl_matrix_memcpy (r, J);
        gsl_linalg_QRPT_decomp (r, tau, perm, &signum, work1);
      }
      
      return GSL_SUCCESS;
    }
  else if (fabs(actred) <= GSL_DBL_EPSILON  && prered <= GSL_DBL_EPSILON 
           && p5 * ratio <= 1.0)
    {
      return GSL_ETOLF ;
    }
  else if (state->delta <= GSL_DBL_EPSILON * state->xnorm)
    {
      return GSL_ETOLX;
    }
  else if (gnorm <= GSL_DBL_EPSILON)
    {
      return GSL_ETOLG;
    }
  else if (iter < 10)
    {
      /* Repeat inner loop if unsuccessful */
      goto lm_iteration;
    }

  return GSL_ENOPROG;
}
Пример #2
0
static VALUE rb_gsl_blas_idamax(int argc, VALUE *argv, VALUE obj)
{
  gsl_vector *x = NULL;
  get_vector1(argc, argv, obj, &x);
  return INT2FIX(gsl_blas_idamax(x));
}
Пример #3
0
 /**
  * C++ version of gsl_blas_idamax().
  * @param X A vector
  * @return Index of largest-magnitude element
  */
 CBLAS_INDEX_t idamax( vector const& X ){ return gsl_blas_idamax( X.get() ); }
Пример #4
0
int main(int argc, char **argv) {

  const int MAX_ITER  = 20;
  const double TOL = 1e-12;
  
  int rank;
  int size;
  int P = 8; // number of blocks to update P <= size

  /* -----------------------------------
     mode controls the selection schemes, 
       mode =0, fixed P
       mode =1, dynamic update P
     ----------------------------------*/
  int mode=1; // number of processors used to update each time
  double lambda = 0.1;
  srand (time(NULL));
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Determine current running process
  MPI_Comm_size(MPI_COMM_WORLD, &size); // Total number of processes
  
  // data directory (you need to change the path to your own data directory)
  char* dataCenterDir = "../Data/Gaussian";
  char* big_dir;
  if(argc==2)
    big_dir = argv[1];
  else
    big_dir = "big1";

  /* Read in local data */
  
  FILE *f, *test;
  int m, n, j;
  int row, col;
  double entry, startTime, endTime;
  double total_start_time, total_end_time;
  /*
   * Subsystem n will look for files called An.dat and bn.dat
   * in the current directory; these are its local data and do not need to be
   * visible to any other processes. Note that
   * m and n here refer to the dimensions of the *local* coefficient matrix.
   */
  
  /* ------------
     Read in A 
     ------------*/
  if(rank ==0){
    printf("=============================\n");
    printf("|    Start to load data!     |\n");
    printf("=============================\n");
  }
  char s[100];
  sprintf(s, "%s/%s/A%d.dat",dataCenterDir,big_dir, rank + 1);
  printf("[%d] reading %s\n", rank, s);
  f = fopen(s, "r");
  if (f == NULL) {
    printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s);
    exit(EXIT_FAILURE);
  }
  mm_read_mtx_array_size(f, &m, &n);
  gsl_matrix *A = gsl_matrix_calloc(m, n);
  for (int i = 0; i < m*n; i++) {
    row = i % m;
    col = floor(i/m);
    fscanf(f, "%lf", &entry);
    gsl_matrix_set(A, row, col, entry);
  }
  fclose(f);
  
  /* ------------
      Read in b 
     -------------*/
  sprintf(s, "%s/%s/b.dat", dataCenterDir, big_dir);
  printf("[%d] reading %s\n", rank, s);
  f = fopen(s, "r");
  if (f == NULL) {
    printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s);
    exit(EXIT_FAILURE);
  }
  mm_read_mtx_array_size(f, &m, &n);
  gsl_vector *b = gsl_vector_calloc(m);
  for (int i = 0; i < m; i++) {
    fscanf(f, "%lf", &entry);
    gsl_vector_set(b, i, entry);
  }
  fclose(f);
  
  /* ------------
     Read in xs 
     ------------*/
  sprintf(s, "%s/%s/xs%d.dat", dataCenterDir, big_dir, rank + 1);
  printf("[%d] reading %s\n", rank, s);
  f = fopen(s, "r");
  if (f == NULL) {
    printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s);
    exit(EXIT_FAILURE);
  }
  mm_read_mtx_array_size(f, &m, &n);
  gsl_vector *xs = gsl_vector_calloc(m);
  
  for (int i = 0; i < m; i++) {
    fscanf(f, "%lf", &entry);
    gsl_vector_set(xs, i, entry);
  }
  fclose(f);
  
  m = A->size1;
  n = A->size2;
  MPI_Barrier(MPI_COMM_WORLD);
  
  /*----------------------------------------
   * These are all variables related to GRock
   ----------------------------------------*/
  
  struct value table[size];
  gsl_vector *x        = gsl_vector_calloc(n);
  gsl_vector *As       = gsl_vector_calloc(n);
  gsl_vector *invAs    = gsl_vector_calloc(n);
  gsl_vector *local_b  = gsl_vector_calloc(m);
  gsl_vector *beta     = gsl_vector_calloc(n);
  gsl_vector *tmp      = gsl_vector_calloc(n);
  gsl_vector *d        = gsl_vector_calloc(n);
  gsl_vector *absd     = gsl_vector_calloc(n);
  gsl_vector *oldx     = gsl_vector_calloc(n);
  gsl_vector *tmpx     = gsl_vector_calloc(n);
  gsl_vector *z        = gsl_vector_calloc(m);
  gsl_vector *tmpz     = gsl_vector_calloc(m);
  gsl_vector *Ax       = gsl_vector_calloc(m);
  gsl_vector *Atmpx    = gsl_vector_calloc(m);
  gsl_vector *xdiff    = gsl_vector_calloc(n);
  gsl_permutation *idx = gsl_permutation_calloc(n);
  double send[1]; 
  double recv[1]; 
  double err;

  int num_upd = (int)(n*0.08);
  double sigma = 0.01;

  double xs_local_nrm[1], xs_nrm[1];
  double local_old_obj, global_old_obj, local_new_obj, global_new_obj;
  //calculate the 2 norm of xs
  xs_local_nrm[0] = gsl_blas_dnrm2(xs);
  xs_local_nrm[0] *=xs_local_nrm[0];
  MPI_Allreduce(xs_local_nrm, xs_nrm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  xs_nrm[0] = sqrt(xs_nrm[0]);
  
  // evaluate the two norm of the columns of A
  for(j=0;j<n;j++){
    gsl_vector_view column = gsl_matrix_column(A, j);
    double d;
    d = gsl_blas_dnrm2(&column.vector);
    gsl_vector_set(As, j, d*d);
    gsl_vector_set(invAs, j, 1./(d*d));
  }
  
  if (rank == 0) {
    printf("=============================\n");
    printf("|GRock start to solve Lasso!|\n");
    printf("|---------------------------|\n");
    printf("|lambda=%1.2f, m=%d, n=%d  |\n", lambda, m, n*size);
    if(mode==1) printf("| Mode: dynamic update P.   |\n");
    else  printf("|   Mode: fixed update P    |\n");
    printf("=============================\n");
    printf("%3s %8s %8s %5s\n", "iter", "rel_err", "obj", "P");
    startTime = MPI_Wtime();
    sprintf(s, "results/test%d.m", size);
    test = fopen(s, "w");
    fprintf(test,"res = [ \n");
  }
  
  /* Main BCD loop */
  total_start_time = MPI_Wtime();
  int iter = 0;
  while (iter < MAX_ITER) {
    startTime = MPI_Wtime();

    /*---------- restore the old x ------------*/
    gsl_vector_memcpy(oldx, x);
    
    /*------- calculate local_b = b - sum_{j \neq i} Aj*xj--------- */ 
    gsl_blas_dgemv(CblasNoTrans, 1, A, x, 0, Ax); // Ax = A * x
    MPI_Allreduce(Ax->data, z->data,  m, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    gsl_vector_sub(z, b); // z = Ax - b
    gsl_vector_memcpy(local_b, Ax);
    gsl_vector_sub(local_b, z);
    
    /* -------calculate beta ------------------*/
    gsl_blas_dgemv(CblasTrans, -1, A, z, 0, beta); // beta = A'(b - Ax) + ||A.s||^2 * xs
    gsl_vector_memcpy(tmp, As);    
    pointwise(tmp, x, n);
    gsl_vector_add(beta, tmp);
    shrink(beta, lambda);
    // x = 1/|xs|^2 * shrink(beta, lambda)
    gsl_vector_memcpy(x, beta);
    pointwise(x, invAs, n); 
  
    /* ------calcuate proposed decrease -------- */
    gsl_vector_memcpy(d,x);
    gsl_vector_sub(d, oldx);
    if(mode ==1){
      gsl_vector_memcpy(absd, d);
      abs_vector(absd, n);
      // sort the local array d
      gsl_vector_scale(absd, -1.0);
      gsl_sort_vector_index(idx, absd);

      //    printf("|d(0)| = %lf, |d(1)| = %lf \n", gsl_vector_get(absd,0), gsl_vector_get(absd, 3));
      // calculate current objective value;
      local_old_obj = objective(oldx, lambda, z, size);
      MPI_Allreduce(&local_old_obj, &global_old_obj, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      num_upd = fmin(num_upd+1, (int)(0.1*n));    
      gsl_vector_memcpy(tmpx, oldx);
      int upd_idx;
      double local_delta = 0, delta=0.0;
      for(int i=0; i<num_upd; i++){
	upd_idx = gsl_permutation_get(idx, i);
	//      printf("%d\n", upd_idx);
	gsl_vector_set(tmpx, upd_idx, gsl_vector_get(x, upd_idx));
	local_delta += gsl_vector_get(d, upd_idx) * gsl_vector_get(d, upd_idx);
      }
      MPI_Allreduce(&local_delta, &delta,  1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);    
      gsl_blas_dgemv(CblasNoTrans, 1, A, tmpx, 0, Atmpx); // Ax = A * x
      MPI_Allreduce(Atmpx->data, tmpz->data,  m, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      gsl_vector_sub(tmpz, b); // z = Ax - b
    
      local_new_obj = objective(tmpx, lambda, tmpz, size);
      MPI_Allreduce(&local_new_obj, &global_new_obj, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

      while(global_new_obj - global_old_obj> -sigma * delta){
	num_upd = fmax(num_upd-1, 1);
	for(int i=0; i<num_upd; i++){
	  upd_idx = gsl_permutation_get(idx, i);
	  gsl_vector_set(tmpx, upd_idx, gsl_vector_get(x, upd_idx));
	  local_delta += gsl_vector_get(d, upd_idx) * gsl_vector_get(d, upd_idx);
	}
	MPI_Allreduce(&delta, &local_delta,  1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);    
	gsl_blas_dgemv(CblasNoTrans, 1, A, tmpx, 0, Atmpx); // Ax = A * x
	MPI_Allreduce(Atmpx->data, tmpz->data,  m, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	gsl_vector_sub(tmpz, b); // z = Ax - b
	
	local_new_obj = objective(tmpx, lambda, tmpz, size);
	MPI_Allreduce(&local_new_obj, &global_new_obj, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	
	if(num_upd==1)
	  break;
      }

      gsl_vector_memcpy(x, tmpx);
    }  

    if(mode==0){
      CBLAS_INDEX_t id = gsl_blas_idamax(d);
      double *store = (double*)calloc(size, sizeof(double));
      double foo[1];
      foo[0] = gsl_vector_get(d,id);
      MPI_Allgather(foo, 1, MPI_DOUBLE, store, 1, MPI_DOUBLE, MPI_COMM_WORLD);
      for(int i=0;i<size;i++){
	table[i].ID   = i;
	table[i].data = fabs(store[i]);
      }
      // quick sort to decide which block to update
      qsort((void *) & table, size, sizeof(struct value), (compfn)compare );
      gsl_vector_memcpy(x, oldx);
      
      if(size>P){
	for(int i=0;i<P;i++){
	  if(rank == table[i].ID)
	    gsl_vector_set(x, id, gsl_vector_get(oldx, id) + gsl_vector_get(d, id));
	}
      }else
	gsl_vector_set(x, id, gsl_vector_get(oldx, id) + gsl_vector_get(d, id));
      local_new_obj = objective(x, lambda, z, size);
      MPI_Allreduce(&local_new_obj, &global_new_obj, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    }
    
    /*------------------------------
      calculate the relative error
      ------------------------------*/
    gsl_vector_memcpy(xdiff,xs);
    gsl_vector_sub(xdiff, x);
    err = gsl_blas_dnrm2(xdiff);
    send[0] = err*err;
    MPI_Allreduce(send, recv, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    recv[0] = sqrt(recv[0])/xs_nrm[0];
 
    endTime = MPI_Wtime();
    if(mode==1) P = num_upd*size;
    if (rank == 0) {
      if(iter%5 == 0)
	printf("%3d %10.2e %10.4f %3d\n", iter,
	       recv[0],  global_new_obj, P);
      fprintf(test, "%e \n",recv[0]);
    }

    /* termination check */
    if(recv[0] < TOL){
      break;
    }
    iter++;
  }
  total_end_time = MPI_Wtime();  
  /* Have the master write out the results to disk */
  if (rank == 0) {
    printf("=============================\n");
    printf("|    GRock solved Lasso!    |\n");
    printf("|---------------------------|\n");
    printf("|Summary:                   |\n");
    printf("|   # of iteration: %d      |\n", iter);
    printf("|   relative error: %4.2e|\n", recv[0]);
    printf("|  objective value: %4.2f    |\n", global_new_obj);
    printf("|             time: %4.1es|\n", total_end_time - total_start_time);
    printf("=============================\n");
    
    fprintf(test,"] \n");
    fprintf(test,"semilogy(1:length(res),res); \n");
    fprintf(test,"xlabel('# of iteration'); ylabel('||x - xs||');\n");
    fclose(test);
    f = fopen("results/solution.dat", "w");
    fprintf(f,"x = [ \n");
    gsl_vector_fprintf(f, x, "%lf");
    fprintf(f,"] \n");
    fclose(f);
    endTime = MPI_Wtime();
  }
  
  MPI_Finalize(); /* Shut down the MPI execution environment */
  
  /* Clear memory */
  gsl_matrix_free(A);
  gsl_vector_free(b);
  gsl_vector_free(x);
  gsl_vector_free(z);
  gsl_vector_free(xdiff);
  gsl_vector_free(Ax);
  gsl_vector_free(As);
  gsl_vector_free(invAs);
  gsl_vector_free(tmpx);
  gsl_vector_free(oldx);
  gsl_vector_free(local_b);
  gsl_vector_free(beta);
  gsl_vector_free(tmpz);
  gsl_vector_free(absd);
  gsl_vector_free(Atmpx);
  gsl_permutation_free(idx);

  return 0;
}