Example #1
0
int mr(spinor * const P, spinor * const Q,
       const int max_iter, const double eps_sq,
       const int rel_prec, const int N, const int parallel, 
       matrix_mult f){
  int i=0;
  double norm_r,beta;
  _Complex double alpha;
  spinor * r;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;
  
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r = solver_field[0];
  
  zero_spinor_field(P, N);
  f(solver_field[2], P);
  diff(r, Q, solver_field[2], N);
  norm_r=square_norm(solver_field[0], N, parallel);
  if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
    printf("MR iteration number: %d, |res|^2 = %e\n", i, norm_r); 
    fflush( stdout );
  }
  while((norm_r > eps_sq) && (i < max_iter)){
    i++;
    f(solver_field[1], r);
    alpha=scalar_prod(solver_field[1], r, N, parallel);
    beta=square_norm(solver_field[1], N, parallel);
    alpha /= beta;
    assign_add_mul(P, r, alpha, N);
    if(i%50 == 0){
      f(solver_field[2], P);
    }
    else{
      assign_add_mul(solver_field[2], solver_field[1], alpha, N);
    }

    diff(r, Q, solver_field[2], N);
    norm_r=square_norm(solver_field[0], N, parallel);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
      printf("# MR iteration= %d  |res|^2= %g\n", i, norm_r); 
      fflush(stdout);
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(norm_r > eps_sq){
    return(-1);
  }
  return(i);
}
Example #2
0
/* Q=Q-sum_i lambda_i |eigen_i><eigen_i|P> */
void sub_lowest_eigenvalues(spinor * const Q, spinor * const P, const int n, const int N) {
  int i;
  complex c;
  
  for(i = 0; i < n; i++){
    c = scalar_prod(&(eigenvectors[i*evlength]), P, N, 1);
    c.re *= -eigenvls[i];
    c.im *= -eigenvls[i];
    assign_add_mul(Q, &eigenvectors[i*evlength], c, N);
  }
}
Example #3
0
/* Q = Q + sum_i 1/lambda_i |eigen_i><eigen_i|P> */
void assign_add_invert_subtracted_part(spinor * const Q, spinor * const P, const int n, const int N) {
  int i=0;
  complex c;
  double rev=0;

  for(i = 0; i < n; i++){
    c = scalar_prod(&eigenvectors[i*evlength], P, N, 1);
    rev = 1./eigenvls[i];
    c.re *= rev;
    c.im *= rev;
    assign_add_mul(Q, &eigenvectors[i*evlength], c, N);
  }
}
Example #4
0
void addproj_q_invsqrt(spinor * const Q, spinor * const P, const int n, const int N) {
  
  int j;
  spinor  *aux;
  complex cnorm, lambda;
  static double save_ev[2]={-1.,-1.};
  static int * ev_sign = NULL;
  
  if(eigenvls[0] != save_ev[0] && eigenvls[1] != save_ev[1] ) {
    if(g_proc_id == 0 && g_debug_level > 1) {
      printf("# Recomputing eigenvalue signs!\n");
      fflush(stdout);
    }
    for(j = 0; j < 2; j++) {
      save_ev[j] = eigenvls[j];
    }
    free(ev_sign);
    ev_sign = (int*) malloc(n * sizeof(int));

    aux=lock_Dov_WS_spinor(1);

    for(j=0; j < n; j++) {
      D_psi(aux, &(eigenvectors[j*evlength]));
      gamma5(aux, aux, N);
      
      lambda = scalar_prod(&(eigenvectors[j*evlength]), aux, N, 1);
      if (lambda.re < 0) {
	ev_sign[j] = -1;
      }
      else {
	ev_sign[j] = 1;
      }
    }

    unlock_Dov_WS_spinor(1);
/*     free(aux_); */
  }

  for(j = 0; j < n; j++) {
    cnorm = scalar_prod(&(eigenvectors[j*evlength]), P, N, 1);
    
    cnorm.re *= (double)ev_sign[j];
    cnorm.im *= (double)ev_sign[j];
    
    assign_add_mul(Q, &(eigenvectors[j*evlength]), cnorm, N);
  }
  return;
}
Example #5
0
void invert_eigenvalue_part(spinor * const Q, spinor * const P, const int n, const int N) {
  int i=0;
  complex c;
  double rev=0;

/*   c = scalar_prod(&eigenvectors[0], P, N, 1); */
/*   rev = 1./eigenvls[0]; */
/*   c.re *= rev; */
/*   c.im *= rev; */
/*   mul(Q, c, &eigenvectors[0], N); */
  assign(Q, P, N);
  for(i = 0; i < n; i++){
    c = scalar_prod(&eigenvectors[i*evlength], P, N, 1);
    c.re *= -inv_eigenvls[i];
    c.im *= -inv_eigenvls[i];
    assign_add_mul(Q, &eigenvectors[i*evlength], c, N);
  }
}
Example #6
0
int arpack_cg(
  /* solver params */
  const int N,                   /* (IN) Number of lattice sites for this process*/
  solver_params_t solver_params, /* (IN) parameters for solver */
  spinor * const x,              /* (IN/OUT) initial guess on input, solution on output for this RHS*/
  spinor * const b,              /* (IN) right-hand side*/
  matrix_mult f,                 /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */
  matrix_mult f32,               /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */
  const double eps_sq,           /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/
  const int rel_prec,            /* (IN) 0 for using absoute error for convergence
                                         1 for using relative error for convergence*/
  const int maxit,               /* (IN) Maximum allowed number of iterations to solution for the linear system*/
  matrix_mult f_final,           /* (IN) final operator application during projection of type 1 */
  matrix_mult f_initial          /* (IN) initial operator application during projection of type 1 */
) {

  /* Static variables and arrays. */
  static int ncurRHS=0;                  /* current number of the system being solved */                   
  static void *_ax,*_r,*_tmps1,*_tmps2;                  
  static spinor *ax,*r,*tmps1,*tmps2;                  
  static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1;
  static _Complex double *zheev_work;
  static double *hevals,*zheev_rwork;
  static int *IPIV; 
  static int info_arpack=0;
  static int nconv=0; /* number of converged eigenvectors as returned by arpack */
  int i,j,tmpsize;
  char cV='V',cN='N', cU='U';   
  int ONE=1;
  int zheev_lwork,zheev_info;
  _Complex double c1, c2, c3, tpone=1.0,tzero=0.0;
  double d1,d2,d3;
  double et1,et2;  /* timing variables */
  char evecs_filename[500];
  char howmny = 'P';
  FILE *evecs_fs=NULL;
  size_t evecs_count;
  WRITER *evecs_writer=NULL;
  spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL;
  paramsPropagatorFormat *evecs_propagatorFormat = NULL;
  void *evecs_io_buffer = NULL;

  int parallel;        /* for parallel processing of the scalar products */
#ifdef TM_USE_MPI
    parallel=1;
#else
    parallel=0;
#endif

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2; 

  /*(IN) Number of right-hand sides to be solved*/ 
  const int nrhs =   solver_params.arpackcg_nrhs; 
  /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ 
  const int nrhs1 =   solver_params.arpackcg_nrhs1;
  /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/
  const double eps_sq1 = solver_params.arpackcg_eps_sq1;
  /*(IN) suqared tolerance for restarting cg */
  const double res_eps_sq =   solver_params.arpackcg_res_eps_sq;

  /* parameters for arpack */

  /*(IN) number of eigenvectors to be computed by arpack*/
  const int nev = solver_params.arpackcg_nev;
   /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/
  const int ncv = solver_params.arpackcg_ncv;
  /*(IN) tolerance for computing eigenvalues with arpack */
  double arpack_eig_tol =   solver_params.arpackcg_eig_tol;
  /*(IN) maximum number of iterations to be used by arpack*/
  int arpack_eig_maxiter =   solver_params.arpackcg_eig_maxiter;
  /*(IN) 0 for eigenvalues with smallest real part "SR"
         1 for eigenvalues with largest real part "LR"
         2 for eigenvalues with smallest absolute value "SM"
         3 for eigenvalues with largest absolute value "LM"
         4 for eigenvalues with smallest imaginary part "SI"
         5 for eigenvalues with largest imaginary part  "LI"*/
  int kind =   solver_params.arpackcg_evals_kind;
  /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 
         1 compute the eigenvalues and the residuals for the original system (the orthonormal basis
           still be used in deflation and they are not overwritten).*/
  int comp_evecs =   solver_params.arpackcg_comp_evecs;
  /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/
  int acc =   solver_params.use_acc;
  /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/
  int cheb_k = solver_params.cheb_k;
  /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emin = solver_params.op_evmin;
  /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emax = solver_params.op_evmax;
  /*(IN) file name to be used for printing out debugging information from arpack*/
  char *arpack_logfile = solver_params.arpack_logfile;
  /*(IN) read eigenvectors in Schur basis from file */
  int  arpack_read_ev = solver_params.arpackcg_read_ev;
  /*(IN) write eigenvectors in Schur basis to file */
  int  arpack_write_ev = solver_params.arpackcg_write_ev;
  /*(IN) file name to be used for reading and writing evecs from and to disc */
  char *arpack_evecs_filename = solver_params.arpack_evecs_filename;
   /*(IN) precision used for writing eigenvectors */
  int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec;
  /* how to project with approximate eigenvectors */
  int projection_type = solver_params.projection_type;
  /* file format for evecs used by arpack */
  char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; 

  /*-------------------------------------------------------------
    if this is the first right hand side, allocate memory, 
    call arpack, and compute resiudals of eigenvectors if needed
    -------------------------------------------------------------*/ 
  if(ncurRHS==0){ 
#if (defined SSE || defined SSE2 || defined SSE3)
    _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_ax==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n");
       exit(1);
    }
    else
       {ax  = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);}

    _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_r==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n");
       exit(1);
    }
    else
       {r  = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps1==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps1  = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps2==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps2  = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);}

#else
    ax = (spinor *) malloc(LDN*sizeof(spinor));
    r  = (spinor *) malloc(LDN*sizeof(spinor));
    tmps1 = (spinor *) malloc(LDN*sizeof(spinor));
    tmps2 = (spinor *) malloc(LDN*sizeof(spinor));
    
    if( (ax == NULL)  || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) )
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n");
       exit(1);
    }
#endif


    evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer  */
    evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); 
    tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double));

    if((evecs == NULL)  || (evals==NULL) || (tmpv1==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n");
       exit(1);
    }

    if ( arpack_read_ev == 1) {

      if (strcmp(arpack_evecs_fileformat, "partfile") == 0) {
        /* set evec filenmae */
        sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
        evecs_fs = fopen(evecs_filename, "r");
        if (evecs_fs == NULL) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename);
          return(-2);
        }
        fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename);

        if(arpack_evecs_writeprec == 64) {
 
          evecs_io_buffer = (void*)evecs;
   
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs);
          et2=gettime();
        
        } else {
          evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N );
          if( evecs_io_buffer == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
            return(-42);
          }
  
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs);
          et2=gettime();

          single2double(evecs, evecs_io_buffer, nev*24*N);

          free( evecs_io_buffer );
          evecs_io_buffer = NULL;
        }
       
        if( evecs_count != ((size_t)nev*12*N) ) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename);
          return(-3);
        }
        fclose(evecs_fs);
        evecs_fs = NULL;
        if(g_proc_id == g_stdio_proc) {
          fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1);
        }
      } else if(strcmp(arpack_evecs_fileformat, "single") == 0) {

        if(N==VOLUME) {
          for(i=0; i<nev; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
            evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
            evecs_ptr1 = NULL;
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        } else if(N==VOLUME/2) {
          for(i=0; i<nev/2; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
            evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
            evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        }
      }   /* of if arpack_evecs_fileformat */

      /* set info_arpack pro forma to SUCCESS */
      nconv = nev;
      info_arpack = 0;
    } else {
      et1=gettime();
      evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile);
      et2=gettime();

      if(info_arpack != 0){ /* arpack didn't converge */
      if(g_proc_id == g_stdio_proc)
        fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n");
        return -1;
      }
    
      if(g_proc_id == g_stdio_proc)
      {
         fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv);
         fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1);
      }

      if ( arpack_write_ev == 1) {

        if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) {

          if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n");
          /* set evec filenmae */
          sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);

          evecs_fs = fopen(evecs_filename, "w");
          if (evecs_fs == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename);
            return(-4);
          }
        
          if(arpack_evecs_writeprec == 64) {

            evecs_io_buffer = (void*)evecs;
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs);
            et2=gettime();

          } else {
            evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N );
            if( evecs_io_buffer == NULL) {
              fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
              return(-41);
            }
            double2single(evecs_io_buffer, evecs, nconv*24*N);
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs);
            et2=gettime();
            free(evecs_io_buffer);
            evecs_io_buffer = NULL;
          }
 
          if( evecs_count != ((size_t)nconv*12*N) ) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename);
            return(-5);
          }
          fclose(evecs_fs);
          evecs_fs = NULL;

          if(g_proc_id == g_stdio_proc) {
            fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1);
          }

        } else if (strcmp(arpack_evecs_fileformat, "single") == 0) {

          if(N==VOLUME) {
            for(i=0; i<nconv; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
              evecs_ptr1 = NULL;
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            } /* end of loop on converged eigenvectors */
          } else if(N==VOLUME/2) {
            for(i=0; i<nconv/2; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
              evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            }  /* end of loop on converged eigenvectors */
          }    /* end of if N == VOLUME */

        }      /* of if arpack_evecs_fileformat */

      }        /* end of if arpack_write_ev == 1 */

    }          /* end of if arpack_read_ev == 1 */

    H        = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    Hinv     = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); 
    IPIV     = (int *) malloc(nconv*sizeof(int));
    zheev_lwork = 3*nconv;
    zheev_work  = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double));
    zheev_rwork = (double *) malloc(3*nconv*sizeof(double));
    hevals      = (double *) malloc(nconv*sizeof(double));

    if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n");
       exit(1);
    }

    et1=gettime();
    /* compute the elements of the hermitian matrix H 
       leading dimension is nconv and active dimension is nconv */
    
    if( projection_type == 0) {
    
      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(r,&evecs[i*12*N],12*N);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        H[i+nconv*i] = creal(c1);  /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(r,&evecs[j*12*N],12*N);
          c1 = scalar_prod(r,ax,N,parallel);
          H[j+nconv*i] = c1;
          H[i+nconv*j] = conj(c1); /* enforce hermiticity */
        }
      }

    } else if ( projection_type == 1 )  {

      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
        f_final(r, tmps1);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        c2 = scalar_prod(r,r,N,parallel);
        H[i+nconv*i] = creal(c1) / creal(c2);   /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N);
          f_final(r, tmps1);
          c1 = scalar_prod(r,ax,N,parallel);
          c3 = scalar_prod(r, r, N, parallel);

          H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) );
          H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */
        }
      }
    }


    et2=gettime();
    if(g_proc_id == g_stdio_proc) {
      fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1);
    }

/*
    if(g_cart_id == 0) {
      for(i=0; i<nconv; i++) {
      for(j=0; j<nconv; j++) {
        fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j]));
      }}
    }
*/



     et1=gettime();
     /* compute Ritz values and Ritz vectors if needed */
     if( (nconv>0) && (comp_evecs !=0))
     {
         HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
         if( HU==NULL ) {
           if(g_proc_id == g_stdio_proc)
             fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n");
             exit(2);
         }
         /* copy H into HU */
         tmpsize=nconv*nconv;
         _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE);

         /* compute eigenvalues and eigenvectors of HU*/
         /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */
         _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1);

         if(zheev_info != 0)
         {
	    if(g_proc_id == g_stdio_proc) 
	    {
	        fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info =  %d\n",zheev_info); 
                fflush(stderr);
	    }
	    exit(1);
         }

         /* If you want to replace the schur (orthonormal) basis by eigen basis
            use something like this. It is better to use the schur basis because
            they are better conditioned. Use this part only to get the eigenvalues
            and their resduals for the operator (D^\daggerD)
            esize=(ncv-nconv)*12*N;
            Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */

         /* compute residuals and print out results */

	 if(g_proc_id == g_stdio_proc)
	 {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); 
          fprintf(stdout,"# [arpack_cg] =============================================================\n");
          fflush(stdout);}

         for(i=0; i<nconv; i++)
         {
	    tmpsize=12*N;
            _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize,
		       &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1);

            assign_complex_to_spinor(r,tmpv1,12*N);

            d1=square_norm(r,N,parallel);
            
            f(ax,r);

            mul_r(tmps1,hevals[i],r,N);

            diff(tmps2,ax,tmps1,N);
	    
	    d2= square_norm(tmps2,N,parallel);

            d3= sqrt(d2/d1);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);}
        } 
        free( HU ); HU = NULL;
     }  /* if( (nconv_arpack>0) && (comp_evecs !=0)) */
     et2=gettime();
     if(g_proc_id == g_stdio_proc) {
       fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1);
     }

  }  /* if(ncurRHS==0) */
    
  double eps_sq_used,restart_eps_sq_used;  /* tolerance squared for the linear system */

  double cur_res; /* current residual squared */

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  /* set the tolerance to be used for this right-hand side  */
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); 
    fflush(stdout);
  } 
  
  /*---------------------------------------------------------------*/
  /* Call init-CG until this right-hand side converges             */
  /*---------------------------------------------------------------*/
  double wt1,wt2,wE,wI;
  double normsq,tol_sq;
  int flag,maxit_remain,numIts,its;
  int info_lapack;

  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	  /* System has not converged yet */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  
  restart_eps_sq_used=res_eps_sq;

  while( flag == -1 )
  {
    
    if(nconv > 0)
    {


      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		   */
      /* --------------------------------------------------------- */
      wt1 = gettime();

      /*r0=b-Ax0*/
      f(ax,x); /*ax = A*x */
      diff(r,b,ax,N);  /* r=b-A*x */

      if( projection_type == 0) {

        /* x = x + evecs*inv(H)*evecs'*r */
        for(int i=0; i < nconv; i++)
        {
           assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
           initwork[i]= scalar_prod(tmps1,r,N,parallel);
        }

        /* solve the linear system H y = c */
        tmpsize=nconv*nconv;
        _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */
        /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
        _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack);

        if(info_lapack != 0)
        {
           if(g_proc_id == g_stdio_proc) {
              fprintf(stderr, "[arpack_cg] Error in ZGESV:, info =  %d\n",info_lapack); 
              fflush(stderr);
           }
           exit(1);
        }

        /* x = x + evecs*inv(H)*evecs'*r */
        for(i=0; i<nconv; i++)
        {
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
          assign_add_mul(x,tmps1,initwork[i],N);
        }

      } else if ( projection_type == 1 ) {
        /* x = x + evecs*inv(H)*evecs'*r */

        /* tmps2 = Q^+ r */
        f_initial(tmps2, r);

        for(int i=0; i < nconv; i++) {
          /* tmps1 = v_i */
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);

          /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */
          initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] );
        }

        memset(tmps2, 0, N*sizeof(spinor) );
        for(i=0; i<nconv; i++) {
          assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
          assign_add_mul(tmps2, tmps1, initwork[i], N);
        }

        /* apply final operator */
        f_final(tmps1, tmps2);
        assign_add_mul(x, tmps1, 1., N);

      }  /* end of if projection type */

      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      wI = wI + wt2-wt1;
      
    }/* if(nconv > 0) */


    /* which tolerance to use */
    if(eps_sq_used > restart_eps_sq_used)
    {
       tol_sq = eps_sq_used;
       flag   = 1; /* shouldn't restart again */
    }
    else
    {
       tol_sq = restart_eps_sq_used;
    }

    wt1 = gettime();
    its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); 
          
    wt2 = gettime();

    wE = wE + wt2-wt1;

    /* check convergence */
    if(its == -1)
    {
       /* cg didn't converge */
       if(g_proc_id == g_stdio_proc) {
         fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); 
         fflush(stderr);
         exit(1);
         
       }
    } 
    else
    {
       numIts += its;   
       maxit_remain = maxit - numIts; /* remaining number of iterations */
       restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */
    }
    
  }
  /* end while (flag ==-1)               */
  
  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(ax,x); /* ax= A*x */
  diff(r,b,ax,N);  /* r=b-A*x */	
  normsq=square_norm(r,N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "# [arpack_cg] For this rhs:\n");
    fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI);
    fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE);
    fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); 
    fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys  : %+e\n",normsq);
  }


  /* free memory if this was your last system to solve */
  if(ncurRHS == nrhs){
#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) 
    free(_ax);  free(_r);  free(_tmps1); free(_tmps2);
#else
    free(ax); free(r); free(tmps1); free(tmps2);
#endif
    free(evecs); free(evals); free(H); free(Hinv);
    free(initwork); free(tmpv1); free(zheev_work);
    free(hevals); free(zheev_rwork); free(IPIV);
  }


  return numIts;
}
Example #7
0
int mrblk(spinor * const P, spinor * const Q,
	  const int max_iter, const double eps_sq,
	  const int rel_prec, const int N, 
	  matrix_mult_blk f, const int blk) {
  static int mr_init=0;
  int i = 0;
  double norm_r,beta;
  _Complex double alpha;
  spinor * r;
  const int parallel = 0;
  spinor * s[3];
  static spinor *s_=NULL;
  static int N_;

  if(mr_init == 0 || N != N_) {
    if(N!= N_ && mr_init != 0) {
      free(s_);
    }
    N_ = N;
    s_ = calloc(3*(N+1)+1, sizeof(spinor));
    mr_init = 1;
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  s[0] = (spinor *)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE); 
#else
  s[0] = s_;
#endif
  s[1] = s[0] + N + 1;
  s[2] = s[1] + N + 1;

  r = s[0];
  norm_r = square_norm(Q, N, parallel);
  
  zero_spinor_field(P, N);
  f(s[2], P, blk);
  diff(r, Q, s[2], N);
  norm_r = square_norm(r, N, parallel);
  if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) {
    printf("MRblk iteration= %d  |res|^2= %e\n", i, norm_r);
    fflush( stdout );
  }
  
  while((norm_r > eps_sq) && (i < max_iter)){
    i++;
    f(s[1], r, blk);
    alpha = scalar_prod(s[1], r, N, parallel);
    beta = square_norm(s[1], N, parallel);
    alpha /= beta;
    assign_add_mul(P, r, alpha, N);
    if(i%50 == 0) {
      f(s[2], P,blk);
    }
    else{
      assign_add_mul(s[2], s[1], alpha, N);
    }
    
    diff(r, Q, s[2], N);
    norm_r = square_norm(r, N, parallel);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) {
      printf("MRblk iteration= %d  |res|^2= %g\n", i, norm_r);
      fflush(stdout);
    }
  }
  /* free(s_); */
  if(norm_r > eps_sq){
    return(-1);
  }
  return(i);
}
Example #8
0
int chrono_guess(spinor * const trial, spinor * const phi, spinor ** const v, int index_array[], 
		 const int _N, const int _n, const int V, matrix_mult f) {
  int info = 0;
  int i, j, N=_N, n=_n;
  _Complex double s;
  static int init_csg = 0;
  static _Complex double *bn = NULL;
  static _Complex double *G = NULL;
  int max_N = 20;

  if(N > 0) {
    if(g_proc_id == 0 && g_debug_level > 1) {
      printf("CSG: preparing  trial vector \n");
      fflush(stdout);
    }
    if(init_csg == 0) {
      init_csg = 1;
      bn = (_Complex double*) malloc(max_N*sizeof(_Complex double));
      G = (_Complex double*) malloc(max_N*max_N*sizeof(_Complex double));
    }

    /* Construct an orthogonal basis */
    for(j = n-1; j > n-2; j--) {
      for(i = j-1; i > -1; i--) {
	s = scalar_prod(v[index_array[j]], v[index_array[i]], V, 1);
	assign_diff_mul(v[index_array[i]], v[index_array[j]], s, V);
	if(g_debug_level > 2) {
	  s = scalar_prod(v[index_array[i]], v[index_array[j]], V, 1);
	  if(g_proc_id == 0) {
	    printf("CSG: <%d,%d> = %e +i %e \n", i, j, creal(s), cimag(s));fflush(stdout);
	  }
	}
      }
    }
    
    /* Generate "interaction matrix" V^\dagger f V */
    /* We assume that f is hermitian               */
    /* Generate also the right hand side           */
    
    for (j = 0; j < n; j++){
      f(trial, v[index_array[j]]);
      
      /* Only the upper triangular part is stored      */
      for(i = 0; i < j+1; i++){
	G[i*N + j] = scalar_prod(v[index_array[i]], trial, V, 1);  
	if(j != i) {
	  (G[j*N + i]) = conj(G[i*N + j]);
	}
	if(g_proc_id == 0 && g_debug_level > 2) {
	  printf("CSG: G[%d*N + %d]= %e + i %e  \n", i, j, creal(G[i*N + j]), cimag(G[i*N + j]));
	  fflush(stdout);
	}
      }
      /* The right hand side */
      bn[j] = scalar_prod(v[index_array[j]], phi, V, 1);  
    }
    
    /* Solver G y = bn for y and store it in bn */
    LUSolve(n, G, N, bn);

    /* Construct the new guess vector */
    if(info == 0) {
      mul(trial, bn[n-1], v[index_array[n-1]], V); 
      if(g_proc_id == 0 && g_debug_level > 2) {
	printf("CSG: bn[%d] = %f %f\n", index_array[n-1], creal(bn[index_array[n-1]]), cimag(bn[index_array[n-1]]));
      }
      for(i = n-2; i > -1; i--) {
	assign_add_mul(trial, v[index_array[i]], bn[i], V);
	if(g_proc_id == 0 && g_debug_level > 2) {
	  printf("CSG: bn[%d] = %f %f\n", index_array[i], creal(bn[index_array[i]]), cimag(bn[index_array[i]]));
	}
      }
    }
    else {
      assign(trial, phi, V);
    }

    if(g_proc_id == 0 && g_debug_level > 1) {
      printf("CSG: done! n= %d N=%d \n", n, N);fflush(stdout);
    }
  }
  else {
    if(g_proc_id == 0 && g_debug_level > 1) {
      printf("CSG: using zero trial vector \n");
      fflush(stdout);
    }
    zero_spinor_field(trial, V);
  }

  return(info);
}
Example #9
0
// computes ||(1 - C^dagger R C) phi||
void check_C_ndpsi(spinor * const k_up, spinor * const k_dn,
		   spinor * const l_up, spinor * const l_dn,
		   const int id, hamiltonian_field_t * const hf,
		   solver_params_t * solver_params) {
  monomial * mnl = &monomial_list[id];
  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
			     l_up, l_dn, solver_params);

  assign(k_up, l_up, VOLUME/2);
  assign(k_dn, l_dn, VOLUME/2);

  // apply C to the random field to generate pseudo-fermion fields
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 - i nu_j
    // this needs phmc_Cpol = 1 to work!
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			       g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			       I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
  }
  //apply R
  solver_params->shifts = mnl->rat.mu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn,
	       solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }
  // apply C^dagger
  solver_params->shifts = mnl->rat.nu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn, solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 + i nu_j
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
  }
  diff(k_up, k_up, l_up, VOLUME/2);  
  diff(k_dn, k_dn, l_dn, VOLUME/2);  
  double resi = square_norm(k_up, VOLUME/2, 1);
  resi += square_norm(k_dn, VOLUME/2, 1);
  if(g_proc_id == 0) printf("|| (1-C^dagger R C)*phi|| = %e\n", resi);

  return;
}
Example #10
0
File: gcr.c Project: annube/tmLQCD
int gcr(spinor * const P, spinor * const Q, 
	const int m, const int max_restarts,
	const double eps_sq, const int rel_prec,
	const int N, const int precon, matrix_mult f) {

  int k, l, restart, i, iter = 0;
  double norm_sq, err;
  spinor * rho, * tmp;
  complex ctmp;
  spinor ** solver_field = NULL;
  const int nr_sf = 2;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  rho = solver_field[0];
  tmp = solver_field[1];

  init_gcr(m, N+RAND);

  norm_sq = square_norm(Q, N, 1);
  if(norm_sq < 1.e-32) {
    norm_sq = 1.;
  }
  
  for(restart = 0; restart < max_restarts; restart++) {
    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(rho, Q, tmp, N);
    err = square_norm(rho, N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2){
      printf("GCR: iteration number: %d, true residue: %g\n", iter, err); 
      fflush(stdout);
    }
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
      finalize_solver(solver_field, nr_sf);
      return(iter);
    }
    for(k = 0; k < m; k++) {
      
      if(precon == 0) {
	assign(xi[k], rho, N);
      }
      else {
        zero_spinor_field(xi[k], N);  
        Msap_eo(xi[k], rho, 6);   
 	/* Msap(xi[k], rho, 8); */
      }
	  
      dfl_sloppy_prec = 1;
      dfl_little_D_prec = 1.e-12;
      f(tmp, xi[k]); 
	  
      /* tmp will become chi[k] */
      for(l = 0; l < k; l++) {
        a[l][k] = scalar_prod(chi[l], tmp, N, 1);
        assign_diff_mul(tmp, chi[l], a[l][k], N);
      }
      b[k] = sqrt(square_norm(tmp, N, 1));
      mul_r(chi[k], 1./b[k], tmp, N);
      c[k] = scalar_prod(chi[k], rho, N, 1);
      assign_diff_mul(rho, chi[k], c[k], N);
      err = square_norm(rho, N, 1);
      iter ++;
      if(g_proc_id == g_stdio_proc && g_debug_level > 0){
        if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); 
        else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq);
        fflush(stdout);
      }
      /* Precision reached? */
      if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        break;
      }
    }

    /* prepare for restart */
    _mult_real(c[k], c[k], 1./b[k]);
    assign_add_mul(P, xi[k], c[k], N);
    for(l = k-1; l >= 0; l--) {
      for(i = l+1; i <= k; i++) {
        _mult_assign_complex(ctmp, a[l][i], c[i]);
        /* c[l] -= ctmp */
        _diff_complex(c[l], ctmp);
      }
      _mult_real(c[l], c[l], 1./b[l]);
      assign_add_mul(P, xi[l], c[l], N);
    }
  }
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #11
0
int gmres(spinor * const P,spinor * const Q, 
	  const int m, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, const int parallel, matrix_mult f){

  int restart, i, j, k;
  double beta, eps, norm;
  complex tmp1, tmp2;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  eps=sqrt(eps_sq);
  init_gmres(m, VOLUMEPLUSRAND);

  norm = sqrt(square_norm(Q, N, parallel));

  assign(solver_field[2], P, N);
  for(restart = 0; restart < max_restarts; restart++){
    /* r_0=Q-AP  (b=Q, x+0=P) */
    f(solver_field[0], solver_field[2]);
    diff(solver_field[0], Q, solver_field[0], N);

    /* v_0=r_0/||r_0|| */
    alpha[0].re=sqrt(square_norm(solver_field[0], N, parallel));

    if(g_proc_id == g_stdio_proc && g_debug_level > 1){
      printf("%d\t%g true residue\n", restart*m, alpha[0].re*alpha[0].re); 
      fflush(stdout);
    }

    if(alpha[0].re==0.){
      assign(P, solver_field[2], N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    mul_r(V[0], 1./alpha[0].re, solver_field[0], N);

    for(j = 0; j < m; j++){
      /* solver_field[0]=A*v_j */

      f(solver_field[0], V[j]);

      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
      assign(solver_field[1], solver_field[0], N);
      for(i = 0; i <= j; i++){
	H[i][j] = scalar_prod(V[i], solver_field[1], N, parallel);
	assign_diff_mul(solver_field[1], V[i], H[i][j], N);
      }

      _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, parallel)), 0.);
      for(i = 0; i < j; i++){
	tmp1 = H[i][j];
	tmp2 = H[i+1][j];
	_mult_real(H[i][j], tmp2, s[i]);
	_add_assign_complex_conj(H[i][j], c[i], tmp1);
	_mult_real(H[i+1][j], tmp1, s[i]);
	_diff_assign_complex(H[i+1][j], c[i], tmp2);
      }

      /* Set beta, s, c, alpha[j],[j+1] */
      beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
      s[j] = H[j+1][j].re / beta;
      _mult_real(c[j], H[j][j], 1./beta);
      _complex_set(H[j][j], beta, 0.);
      _mult_real(alpha[j+1], alpha[j], s[j]);
      tmp1 = alpha[j];
      _mult_assign_complex_conj(alpha[j], c[j], tmp1);

      /* precision reached? */
      if(g_proc_id == g_stdio_proc && g_debug_level > 1){
	printf("%d\t%g residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
	fflush(stdout);
      }
      if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
	_mult_real(alpha[j], alpha[j], 1./H[j][j].re);
	assign_add_mul(solver_field[2], V[j], alpha[j], N);
	for(i = j-1; i >= 0; i--){
	  for(k = i+1; k <= j; k++){
 	    _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	    _diff_complex(alpha[i], tmp1);
	  }
	  _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	  assign_add_mul(solver_field[2], V[i], alpha[i], N);
	}
	for(i = 0; i < m; i++){
	  alpha[i].im = 0.;
	}
	assign(P, solver_field[2], N);
	finalize_solver(solver_field, nr_sf);
	return(restart*m+j);
      }
      /* if not */
      else{
	if(j != m-1){
	  mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N);
	}
      }

    }
    j=m-1;
    /* prepare for restart */
    _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
    assign_add_mul(solver_field[2], V[j], alpha[j], N);
    for(i = j-1; i >= 0; i--){
      for(k = i+1; k <= j; k++){
	_mult_assign_complex(tmp1, H[i][k], alpha[k]);
	_diff_complex(alpha[i], tmp1);
      }
      _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
      assign_add_mul(solver_field[2], V[i], alpha[i], N);
    }
    for(i = 0; i < m; i++){
      alpha[i].im = 0.;
    }
  }

  /* If maximal number of restarts is reached */
  assign(P, solver_field[2], N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #12
0
void rat_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  solver_pm_t solver_pm;
  double atime, etime, dummy;
  atime = gettime();
  // only for non-twisted operators
  g_mu = 0.;
  g_mu3 = 0.;
  boundary(mnl->kappa);

  mnl->iter1 = 0;
  g_mu3 = 0.;
  if(mnl->type == CLOVERRAT) {
    g_c_sw = mnl->c_sw;
    init_sw_fields();
    sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert(EE, 0.);
  }
  // we measure before the trajectory!
  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
    //if(mnl->type != CLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
    //else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
  }

  // the Gaussian distributed random fields
  mnl->energy0 = 0.;
  random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 = square_norm(mnl->pf, VOLUME/2, 1);

  // set solver parameters
  solver_pm.max_iter = mnl->maxiter;
  solver_pm.squared_solver_prec = mnl->accprec;
  solver_pm.no_shifts = mnl->rat.np;
  solver_pm.shifts = mnl->rat.nu;
  solver_pm.type = CGMMS;
  solver_pm.M_psi = mnl->Qsq;
  solver_pm.sdim = VOLUME/2;
  solver_pm.rel_prec = g_relative_precision_flag;
  mnl->iter0 = cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
			 &solver_pm, &dummy);

  assign(mnl->w_fields[2], mnl->pf, VOLUME/2);

  // apply C to the random field to generate pseudo-fermion fields
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q - i nu_j (not twisted mass term, so Qp=Qm=Q
    mnl->Qp(g_chi_up_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j]);
    assign_add_mul(g_chi_up_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j], -I*mnl->rat.nu[j], VOLUME/2);
    assign_add_mul(mnl->pf, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
  }

  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { 
      printf("called rat_heatbath for id %d energy %f\n", id, mnl->energy0);
    }
  }
  return;
}
Example #13
0
int gmres_dr(spinor * const P,spinor * const Q, 
	  const int m, const int nr_ev, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, matrix_mult f){

  int restart=0, i, j, k, l;
  double beta, eps, norm, beta2=0.;
  complex *lswork = NULL;
  int lwork;
  complex tmp1, tmp2;
  int info=0;
  int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  double err=0.;
  spinor * r0, * x0;

  cmone.re = -1.; cmone.im=0.;
  cpone.re = 1.; cpone.im=0.;
  czero.re = 0.; czero.im = 0.;
  
  r0 = solver_field[0];
  x0 = solver_field[2];
  eps=sqrt(eps_sq);  
  init_gmres_dr(m, (VOLUMEPLUSRAND));
  norm = sqrt(square_norm(Q, N, 1));

  assign(x0, P, N);

  /* first normal GMRES cycle */
  /* r_0=Q-AP  (b=Q, x+0=P) */
  f(r0, x0);
  diff(r0, Q, r0, N);
  
  /* v_0=r_0/||r_0|| */
  alpha[0].re=sqrt(square_norm(r0, N, 1));
  err = alpha[0].re;
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0){
    printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); 
    fflush(stdout);
  }
  
  if(alpha[0].re==0.){
    assign(P, x0, N);
    finalize_solver(solver_field, nr_sf);
    return(restart*m);
  }
  
  mul_r(V[0], 1./alpha[0].re, r0, N);
  
  for(j = 0; j < m; j++){
    /* solver_field[0]=A*v_j */

    /* Set h_ij and omega_j */
    /* solver_field[1] <- omega_j */    
    f(solver_field[1], V[j]);
/*     assign(solver_field[1], solver_field[0], N); */
    for(i = 0; i <= j; i++){
      H[i][j] = scalar_prod(V[i], solver_field[1], N, 1);
      /* G, work and work2 are in Fortran storage: columns first */
      G[j][i] = H[i][j];
      work2[j][i] = H[i][j];
      work[i][j].re = H[i][j].re;
      work[i][j].im = -H[i][j].im;
      assign_diff_mul(solver_field[1], V[i], H[i][j], N);
    }
    
    _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.);
    G[j][j+1] = H[j+1][j];
    work2[j][j+1] = H[j+1][j];
    work[j+1][j].re =  H[j+1][j].re;
    work[j+1][j].im =  -H[j+1][j].im;
    beta2 = H[j+1][j].re*H[j+1][j].re; 
    for(i = 0; i < j; i++){
      tmp1 = H[i][j];
      tmp2 = H[i+1][j];
      _mult_real(H[i][j], tmp2, s[i]);
      _add_assign_complex_conj(H[i][j], c[i], tmp1);
      _mult_real(H[i+1][j], tmp1, s[i]);
      _diff_assign_complex(H[i+1][j], c[i], tmp2);
    }
    
    /* Set beta, s, c, alpha[j],[j+1] */
    beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
    s[j] = H[j+1][j].re / beta;
    _mult_real(c[j], H[j][j], 1./beta);
    _complex_set(H[j][j], beta, 0.);
    _mult_real(alpha[j+1], alpha[j], s[j]);
    tmp1 = alpha[j];
    _mult_assign_complex_conj(alpha[j], c[j], tmp1);
    
    /* precision reached? */
    if(g_proc_id == g_stdio_proc && g_debug_level > 0){
      printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
      fflush(stdout);
    }
    if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
      _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
      assign_add_mul(x0, V[j], alpha[j], N);
      for(i = j-1; i >= 0; i--){
	for(k = i+1; k <= j; k++){
	  _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	  /* alpha[i] -= tmp1 */
	  _diff_complex(alpha[i], tmp1);
	}
	_mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	assign_add_mul(x0, V[i], alpha[i], N);
      }
      for(i = 0; i < m; i++){
	alpha[i].im = 0.;
      }
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m+j);
    }
    /* if not */
    else {
      mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); 
    }
    
  }
  j=m-1;
  /* prepare for restart */
  _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
  assign_add_mul(x0, V[j], alpha[j], N);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("alpha: %e %e\n", alpha[j].re, alpha[j].im);
  }
  for(i = j-1; i >= 0; i--){
    for(k = i+1; k <= j; k++){
      _mult_assign_complex(tmp1, H[i][k], alpha[k]);
      _diff_complex(alpha[i], tmp1);
    }
    _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
    }
    assign_add_mul(x0, V[i], alpha[i], N);
  }

  /* This produces c=V_m+1*r0 */
  for(i = 0; i < mp1; i++) {
    c[i] = scalar_prod(V[i], r0, N, 1); 
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
    }
  }

  for(restart = 1; restart < max_restarts; restart++) {  

    /* compute c-\bar H \alpha */
    _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1);
    err = sqrt(short_scalar_prod(c, c, mp1).re);
    if(g_proc_id == 0 && g_debug_level > 0) {
      printf("%d\t %e short residue\n", m*restart, err*err);
    } 
    
    /* Compute new residual r0 */
    /* r_0=Q-AP  (b=Q, x+0=P) */
    if(g_debug_level > 0) {
      f(r0, x0);
      diff(r0, Q, r0, N);
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    mul(r0, c[0], V[0], N);
    for(i = 1; i < mp1; i++) {
      assign_add_mul(r0, V[i], c[i], N);
    } 
    if(g_debug_level > 3) {
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    /* Stop if satisfied */
    if(err < eps){
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    /* Prepare to compute harmonic Ritz pairs */
    for(i = 0; i < m-1; i++){
      alpha[i].re = 0.;
      alpha[i].im = 0.;
    }
    alpha[m-1].re = 1.;
    alpha[m-1].im = 0.;
    _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); 
    for(i = 0; i < m; i++) {
      G[m-1][i].re += (beta2*alpha[idx[i]-1].re);
      G[m-1][i].im += (beta2*alpha[idx[i]-1].im);
    }
    if(g_proc_id == 0 && g_debug_level > 3){
      printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", 
	     info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]);
    }
    /* c - \bar H * d -> c */
    /* G contains H + \beta^2 H^-He_n e_n^H */

    /* Compute harmonic Ritz pairs */
    diagonalise_general_matrix(m, G[0], mp1, alpha, evalues);
    for(i = 0; i < m; i++) {
      sortarray[i] = _complex_square_norm(evalues[i]);
      idx[i] = i;
    }
    quicksort(m, sortarray, idx);
    if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
      for(i = 0; i < m; i++) {
	printf("# Evalues %d %e  %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im);
      }
      fflush(stdout);
    }
    
    /* Copy the first nr_ev eigenvectors to work */
    for(i = 0; i < ne; i++) {
      for(l = 0; l < m; l++) {
	work[i][l] = G[idx[i]][l];
      }
    }
    /* Orthonormalize them */
    for(i = 0; i < ne; i++) {
      work[i][m].re = 0.;
      work[i][m].im = 0.;
      short_ModifiedGS(work[i], m, i, work[0], mp1); 
    }
    /* Orthonormalize c - \bar H d to work */
    short_ModifiedGS(c, m+1, ne, work[0], mp1);
    for(i = 0; i < mp1; i++) {
      work[nr_ev][i] = c[i];
    }
    /* Now compute \bar H = P^T_k+1 \bar H_m P_k */
    for(i = 0; i < mp1; i++) {
      for(l = 0; l < mp1; l++) {
	H[i][l].re = 0.;
	H[i][l].im = 0.;
      }
    }    

    _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); 
    _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1);

    if(g_debug_level > 3) {
      for(i = 0; i < ne+1; i++) {
	for(l = 0; l < ne+1; l++) {
	  if(g_proc_id == 0) {
	    printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, 
		   short_scalar_prod(work[i], work[l], m+1).im);
	    printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, 
		   short_scalar_prod(work[l], work[i], m+1).im);
	  }
	}
      }
    }
    /* V_k+1 = V_m+1 P_k+1 */
/*     _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1);  */
    for(l = 0; l < np1; l++) {
      mul(Z[l], work[l][0], V[0], N);
      for(i = 1; i < mp1; i++) {
	assign_add_mul(Z[l], V[i], work[l][i], N);
      }
    }
    /* copy back to V */
    for(i = 0; i < np1; i++) {
      assign(V[i], Z[i], N); 
    }
    /* Reorthogonalise v_nr_ev */
    ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2);  
    if(g_debug_level > 3) {
      for(i = 0; i < np1; i++) {
	for(l = 0; l < np1; l++) {
	  tmp1 = scalar_prod(V[l], V[i], N, 1);
	  if(g_proc_id == 0) {
	    printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im);
	  }
	}
      }
    }
    /* Copy the content of H to work, work2 and G */
    for(i=0; i < mp1; i++) { 
      for(l = 0; l < mp1; l++) { 
 	G[i][l] = H[i][l];
	work2[i][l] = H[i][l];
	work[l][i].re = H[i][l].re;
	work[l][i].im = -H[i][l].im;
      }
    }

    for(j = ne; j < m; j++) {
      /* solver_field[0]=A*v_j */
      f(solver_field[1], V[j]);
      
      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
/*       assign(solver_field[1], solver_field[0], N); */
      for(i = 0; i <= j; i++){
	H[j][i] = scalar_prod(V[i], solver_field[1], N, 1);  
	/* H, G, work and work2 are now all in Fortran storage: columns first */
	G[j][i] = H[j][i];
	work2[j][i] = H[j][i];
	work[i][j].re = H[j][i].re;
	work[i][j].im = -H[j][i].im;
	assign_diff_mul(solver_field[1], V[i], H[j][i], N);
      }
      beta2 = square_norm(solver_field[1], N, 1);
      _complex_set(H[j][j+1], sqrt(beta2), 0.);
      G[j][j+1] = H[j][j+1];
      work2[j][j+1] = H[j][j+1];
      work[j+1][j].re =  H[j][j+1].re;
      work[j+1][j].im =  -H[j][j+1].im;
      mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N);
    }

    /* Solve the least square problem for alpha*/
    /* This produces c=V_m+1*r0 */
    for(i = 0; i < mp1; i++) {      
      c[i] = scalar_prod(V[i], r0, N, 1);  
      alpha[i] = c[i];
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
      }
    }
    if(lswork == NULL) {
      lwork = -1;
      _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1);
      lwork = (int)tmp1.re;
      lswork = (complex*)malloc(lwork*sizeof(complex));
    }
    _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("zgels returned info = %d\n", info);
      fflush(stdout);
    }
    /* Compute the new solution vector */
    for(i = 0; i < m; i++){
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
      }
      assign_add_mul(x0, V[i], alpha[i], N);
    }
  }


  /* If maximal number of restart is reached */
  assign(P, x0, N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #14
0
int cr(spinor * const P, spinor * const Q, 
        const int m, const int max_restarts,
        const double eps_sq, const int rel_prec,
        const int N, const int precon, matrix_mult f) {

    int k, l, restart, i, iter = 0;
    double norm_sq, err;
    spinor * xi, * Axi, * chi, * Achi, *tmp;
    _Complex double alpha, beta;
    static _Complex double one = 1.0;
    double norm, rAr, newrAr;
    double atime, etime;
    spinor ** solver_field = NULL;
    const int nr_sf = 5;
    int save_sloppy = g_sloppy_precision;

    if(N == VOLUME) {
        init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    }
    else {
        init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
    }

    atime = gettime();

    xi = solver_field[0];
    Axi = solver_field[1];
    chi = solver_field[2];
    Achi = solver_field[3];
    tmp = solver_field[4];

    norm_sq = square_norm(Q, N, 1);
    if(norm_sq < 1.e-32) {
        norm_sq = 1.;
    }

    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(chi, Q, tmp, N);
    assign(xi, chi, N);
    f(Axi, xi);
    f(Achi, chi);
    rAr = scalar_prod(chi, Achi, N, 1);
    err = square_norm(chi, N, 1);
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        finalize_solver(solver_field, nr_sf);
        return(iter);
    }
    

    for(k = 0; k < m; k++) {

        dfl_sloppy_prec = 1;

        norm = square_norm(Axi, N, 1);
        alpha = rAr/norm;
        assign_add_mul(P, xi, alpha, N);
        /* get the new residual */
        assign_diff_mul(chi, Axi, alpha, N);

        err = square_norm(chi, N, 1);
        iter ++;
        etime = gettime();
        if(g_proc_id == g_stdio_proc && g_debug_level > 3){
            printf("# CR: %d\t%g iterated residue, time spent %f s\n", iter, err, (etime - atime)); 
            fflush(stdout);
        }
        /* Precision reached? */
        if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            break;
        }

#ifdef _USE_HALFSPINOR
        if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            if (g_sloppy_precision_flag == 1) {
                g_sloppy_precision = 1;
                if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
                    printf("sloppy precision on\n"); fflush( stdout);
                }
            }
        }
#endif

        f(Achi, chi); 

        newrAr = scalar_prod(chi, Achi, N, 1); 
        beta = newrAr/rAr;
        assign_mul_add_mul(xi, beta, chi, one, N);
        assign_mul_add_mul(Axi,beta, Achi, one, N);
        rAr = newrAr;

    }

    g_sloppy_precision = save_sloppy;
    finalize_solver(solver_field, nr_sf);
    return(-1);
}
Example #15
0
int incr_eigcg(const int N, const int nrhs,  const int nrhs1, spinor * const x, spinor * const b, 
               const int ldh, matrix_mult f, const double eps_sq1, const double eps_sq, double restart_eps_sq,  
               const int rand_guess_opt, const int rel_prec, const int maxit, int nev, const int v_max) 
{ 
  /*Static variables and arrays.*/
  static spinor **solver_field; /*4 spinor fields*/

  static int ncurEvals=0;       /* current number of stored eigenvectors */
  static int ncurRHS=0;         /* current number of the system being solved */                   

  static spinor **evecs;        /* accumulated eigenvectors for deflation. */

  static void *_evals;
  static double *evals;         /* Ritz values */

  static void *_v;
  static spinor  *V;            /* work array for eigenvector search basis in eigCG */

  static void *_h;
  static _Complex double  *H;            /* The ncurEvals^2 matrix: H=evecs'*A*evecs */ 

  static void *_hu;
  static _Complex double  *HU;           /* used for diagonalization of H if eigenvalues requested
                                   also used as a copy of H if needed*/
  static void *_initwork;                            
  static _Complex double  *initwork;     /* vector of size ldh using with init-CG */ 

  static void *_ework;
  static _Complex double  *ework;
  /* end of the thinking part */

  static void *_work;
  static _Complex double  *work;

  static void *_rwork;
  static double *rwork;
  
  static void *_IPIV;
  static int *IPIV;        /*integer array to store permutations when solving the small linear system*/

  /* some constants */
  char cU='U'; char cN='N';  char cV='V'; 
  _Complex double  tpone= 1.0e+00;
  _Complex double  tzero= 0.0e+00;
  //tpone.re=+1.0e+00; tpone.im=0.0e+00; 
  //tzero.re=+0.0e+00; tzero.im=0.0e+00;

  /* Timing vars */
  double wt1,wt2,wE,wI;

  double eps_sq_used;

 
  /* Variables */
  double machEps = 1e-15;  
  double normb, normsq, tmpd,tmpd2;
  _Complex double  tempz;
  int i,j, ONE = 1;
  int tmpsize,tmpi,info=0;
  int numIts, flag, nAdded, nev_used;
  int maxit_remain;
  int esize,nrsf;

  int parallel; /* for parallel processing of the scalar products */

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2;
  
 
  #ifdef MPI
    parallel=1;
  #else
    parallel=0;
  #endif
 
  /*think more about this */
  esize=2*12*N+4*nev*nev;  /* fixed size for ework used for restarting in eigcg*/

  nrsf=4;  /*number of solver fields */
  
  int lwork=3*ldh;

  double cur_res; //current residual squared (initial value will be computed in eigcg)

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  //set the tolerance to be used for this right-hand side 
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }

  if(ncurRHS==1)/* If this is the first system, allocate needed memory for the solver*/
  {
    init_solver_field(&solver_field, LDN, nrsf); 
  }

  if(nev==0){ /*incremental eigcg is used as a cg solver. No need to restart forcing no-restart*/
    if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
       fprintf(stdout, "CG won't be restarted in this mode since no deflation will take place (nev=0)\n"); 
       fflush(stdout);
    } 
  
    restart_eps_sq=0.0;
  }




  if((ncurRHS==1) && (nev >0) )/* If this is the first right-hand side and eigenvectors are needed, allocate needed memory*/
  { 
    init_solver_field(&evecs, LDN, ldh); 
     
    #if (defined SSE || defined SSE2 || defined SSE3)

    /*Extra elements are needed for allignment */
    //_v = malloc(LDN*v_max*sizeof(spinor)+ALIGN_BASE);
    _v = calloc(LDN*v_max+ALIGN_BASE,sizeof(spinor));
    V  = (spinor *)(((unsigned long int)(_v)+ALIGN_BASE)&~ALIGN_BASE);

    //_h=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _h=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    H = (_Complex double  *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
 
    //_hu=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _hu=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    HU = (_Complex double  *)(((unsigned long int)(_hu)+ALIGN_BASE)&~ALIGN_BASE);
    
    //_ework = malloc(esize*sizeof(_Complex double )+ALIGN_BASE);
    _ework = calloc(esize+ALIGN_BASE,sizeof(_Complex double ));
    ework=(_Complex double  *)(((unsigned long int)(_ework)+ALIGN_BASE)&~ALIGN_BASE);

    //_initwork = malloc(ldh*sizeof(_Complex double )+ALIGN_BASE);
    _initwork = calloc(ldh+ALIGN_BASE,sizeof(_Complex double ));
    initwork = (_Complex double  *)(((unsigned long int)(_initwork)+ALIGN_BASE)&~ALIGN_BASE);

    //_work = malloc(lwork*sizeof(_Complex double )+ALIGN_BASE);
    _work = calloc(lwork+ALIGN_BASE,sizeof(_Complex double ));
    work = (_Complex double  *)(((unsigned long int)(_work)+ALIGN_BASE)&~ALIGN_BASE);

    //_rwork = malloc(3*ldh*sizeof(double)+ALIGN_BASE);
    _rwork = calloc(3*ldh+ALIGN_BASE,sizeof(double));
    rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);

    
    //_IPIV = malloc(ldh*sizeof(int)+ALIGN_BASE);
    _IPIV = calloc(ldh+ALIGN_BASE,sizeof(int));
    IPIV = (int *)(((unsigned long int)(_IPIV)+ALIGN_BASE)&~ALIGN_BASE);

    //_evals = malloc(ldh*sizeof(double)+ALIGN_BASE);
    _evals = calloc(ldh+ALIGN_BASE,sizeof(double)); 
    evals = (double *)(((unsigned long int)(_evals)+ALIGN_BASE)&~ALIGN_BASE);


    #else

    V = (spinor *) calloc(LDN*v_max,sizeof(spinor));
    H = calloc(ldh*ldh, sizeof(_Complex double ));
    HU= calloc(ldh*ldh, sizeof(_Complex double ));
    initwork = calloc(ldh, sizeof(_Complex double ));
    ework = calloc(esize, sizeof(_Complex double ));
    work = calloc(lwork,sizeof(_Complex double ));
    rwork= calloc(3*ldh,sizeof(double));
    IPIV = calloc(ldh, sizeof(int));
    evals = (double *) calloc(ldh, sizeof(double));

    #endif
    
  } /*if(ncurRHS==1)*/

  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "System %d, eps_sq %e\n",ncurRHS,eps_sq_used); 
    fflush(stdout);
  } 
  
     /*---------------------------------------------------------------*/
     /* Call eigCG until this right-hand side converges               */
     /*---------------------------------------------------------------*/
  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	   /* First time through. Run eigCG regularly */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  

  while( flag == -1 || flag == 3)
  {
    //if(g_proc_id==g_stdio_proc)
      //printf("flag= %d, ncurEvals= %d\n",flag,ncurEvals);
    
    if(ncurEvals > 0)
    {
      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		     */
      /* --------------------------------------------------------- */

      wt1 = gettime();

      /*r0=b-Ax0*/
      normsq = square_norm(x,N,parallel);
      if(normsq>0.0)
      {
	f(solver_field[0],x); /* solver_field[0]= A*x */
	diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */		
      }
      else
	assign(solver_field[1],b,N); /* solver_field[1]=b */
	
      /* apply the deflation using init-CG */
      /* evecs'*(b-Ax) */
      for(i=0; i<ncurEvals; i++)
      {
        initwork[i]= scalar_prod(evecs[i],solver_field[1],N,parallel);
      }
    
      /* solve the linear system H y = c */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE); /* copy H into HU */
      _FT(zgesv) (&ncurEvals,&ONE,HU,&ldh,IPIV,initwork,&ldh,&info);

      if(info != 0)
      {
         if(g_proc_id == g_stdio_proc) {
            fprintf(stderr, "Error in ZGESV:, info =  %d\n",info); 
            fflush(stderr);
         }
         exit(1);
      }
    
      /* x = x + evecs*inv(H)*evecs'*r */
      for(i=0; i<ncurEvals; i++)
      {
        assign_add_mul(x,evecs[i],initwork[i],N);
      }
      
      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      
      wI = wI + wt2-wt1;
      
    }/* if(ncurEvals > 0) */


    /* ------------------------------------------------------------ */
    /* Adjust nev for eigcg according to available ldh/restart      */
    /* ------------------------------------------------------------ */
	  
    if (flag == 3) { /* restart with the same rhs, set nev_used = 0 */
      nev_used = 0;
      /* if convergence seems before next restart do not restart again */
      if(rel_prec)
      {
	       if (cur_res*(restart_eps_sq) < eps_sq*normb*normb) 
	           restart_eps_sq=0.0;
      }
      else
      {
	       if (cur_res*(restart_eps_sq) < eps_sq) 
	          restart_eps_sq=0.0;
      } /* if(rel_prec) */
	  
    }
    else
    {    
      /* First time through this rhs. Find nev evecs */
      /* limited by the ldh evecs we can store in total */
      if (ldh-ncurEvals < nev)
	       nev = ldh - ncurEvals;
      nev_used = nev;
      
    }

    /* ------------------------------------------------------------ */
    /* Solve Ax = b with x initial guess                            */
    /* ------------------------------------------------------------ */

    wt1 = gettime();

    eigcg( N, LDN, x, b, &normb, eps_sq_used, restart_eps_sq, rel_prec, maxit_remain, 
	     &numIts, &cur_res, &flag, solver_field, f, 
	     nev_used, v_max, V, esize, ework);
     
    //if(g_proc_id == g_stdio_proc) 
        //printf("eigcg flag= %d \n",flag); 
      
    wt2 = gettime();

    wE = wE + wt2-wt1;
    
    /* if flag == 3 update the remain max number of iterations */
    maxit_remain = maxit - numIts;
    
  }
  /* end while (flag ==-1 || flag == 3)               */
  /* ------------------------------------------------ */

  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(solver_field[0],x); /* solver_field[0]= A*x */
  diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */	
  normsq=square_norm(solver_field[1],N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "For this rhs:\n");
    fprintf(stdout, "Total initCG Wallclock : %-f\n", wI);
    fprintf(stdout, "Total eigpcg Wallclock : %-f\n", wE);
    fprintf(stdout, "Iterations: %-d\n", numIts); 
    fprintf(stdout, "Residual: %e, Actual Resid of LinSys  : %e\n", cur_res,normsq);
    if (flag != 0) {
      fprintf(stderr, "Error: eigcg returned with nonzero exit status\n");
      return flag;
      fflush(stderr);
    }
    fflush(stdout);
  }
  /* ------------------------------------------------------------------- */
  /* ------------------------------------------------------------------- */
  /* Update the evecs and the factorization of evecs'*A*evecs            */
  /* ------------------------------------------------------------------- */
  if (nev > 0) 
  {

    wt1 = gettime();

    /* Append new Ritz vectors to the basis and orthogonalize them to evecs */
    for(i=0; i<nev_used; i++)
      assign(evecs[i+ncurEvals],&V[i*LDN],N);
    
    nAdded = ortho_new_vectors(evecs,N,ncurEvals,nev_used,machEps);

    /* expand H */
    for(j=ncurEvals; j< (ncurEvals+nAdded); j++)
    {
      f(solver_field[0],evecs[j]);
      
      for(i=0; i<=j; i++)
      {
	       H[i+j*ldh] = scalar_prod(evecs[i],solver_field[0],N,parallel);
	       H[j+i*ldh]=  conj(H[i+j*ldh]);
	       //H[j+i*ldh].re =  H[i+j*ldh].re;
	       //H[j+i*ldh].im = -H[i+j*ldh].im;
      }
      
    }
    
    /* update the number of vectors in the basis */
    ncurEvals = ncurEvals + nAdded;

    /* ---------- */
    /* Reporting  */
    /* ---------- */

    wt2 = gettime();

    
    if(g_proc_id == g_stdio_proc && g_debug_level > 0)
    {
      fprintf(stdout,"ncurRHS %d\n",ncurRHS);
      fprintf(stdout,"ncurEvals %d \n",ncurEvals);
      fprintf(stdout,"Update\n");
      fprintf(stdout,"Added %d vecs\n",nAdded);
      fprintf(stdout,"U Wallclock : %-f\n", wt2-wt1);
      fprintf(stdout,"Note: Update Wall time doesn't include time for computing eigenvalues and their residuals.\n"); 
      fflush(stdout);     
    }
    
    if(g_debug_level > 3)  /*compute eigenvalues and their residuals if requested*/
    {
      /* copy H into HU */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE);

      /* compute eigenvalues and eigenvectors of HU (using V and spinor fields as tmp work spaces)*/
      _FT(zheev)(&cV, &cU, &ncurEvals, HU, &ldh, evals, work, &lwork, rwork, &info,1,1);

      if(info != 0)
      {
	if(g_proc_id == g_stdio_proc) 
	{
	  fprintf(stderr,"Error in ZHEEV:, info =  %d\n",info); 
          fflush(stderr);
	}
	exit(1);
      }

      /* compute residuals and print out results */
      for(i=0; i<ncurEvals; i++)
      {
	    tmpi=12*N;
            tmpsize=12*LDN;

	    
            _FT(zgemv)(&cN,&tmpi,&ncurEvals,&tpone,(_Complex double  *)evecs[0],&tmpsize,
			                 &HU[i*ldh], &ONE,&tzero,(_Complex double  *) solver_field[0],&ONE,1);

            normsq=square_norm(solver_field[0],N,parallel);
            
            f(solver_field[1],solver_field[0]);

            tempz = scalar_prod(solver_field[0],solver_field[1],N,parallel);

            evals[i] = creal(tempz)/normsq;

            mul_r(solver_field[2],evals[i],solver_field[0],N);

            diff(solver_field[3],solver_field[1],solver_field[2], N);
	    
	    tmpd2= square_norm(solver_field[3],N,parallel);

            tmpd= sqrt(tmpd2/normsq);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"RR Eval[%d]: %22.15E rnorm: %22.15E\n", i+1, evals[i], tmpd); fflush(stdout);}
	
      } 
       
    }/*if(plvl >= 2)*/
  } /* if(nev>0) */

  /*--------------------------------------*/
  /*free memory that is no longer needed  */
  /* and reset ncurRHS and ncurEvals      */
  /*--------------------------------------*/

  if(ncurRHS == nrhs) /*this was the last system to be solved */
  {
     ncurRHS=0;
     ncurEvals=0;
     finalize_solver(solver_field,nrsf);
  }

  if( (ncurRHS == nrhs) && (nev >0) )/*this was the last system to be solved and there were allocated memory for eigenvector computation*/
  {
     finalize_solver(evecs,ldh);
     #if (defined SSE || defined SSE2 || defined SSE3)
     free(_v);
     free(_h);
     free(_hu);
     free(_ework);
     free(_initwork);
     free(_IPIV);
     free(_evals);
     free(_rwork);
     free(_work);
     #else
     free(V);
     free(H);
     free(HU);
     free(ework);
     free(initwork);
     free(IPIV);
     free(evals);
     free(rwork);
     free(work);
     #endif
  }

  return numIts;
}
Example #16
0
int update() //Basic HMC update step
{
  double squnrm;
  int i, acc;
  double exphdiff;
  
  /* the new impulses and the 'generator' of the arbitrary pseudofield */
  /* calculate the hamiltonian of this state: new impulses + action */
  /* g_X is ab-used a bit - here it is \xi = (gamma5 D)^{-1} \phi */
  
  ham_old = s_g_old;
  for(i=0; i<GRIDPOINTS; i++) {
    gp1[i] = gauss();
    gp2[i] = gauss();
    ham_old += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  
  /* Now create the field and calculate its contributions to the action (end of the 'misuse') */
  /* squnrm is the fermion part of the action : */
  /*   S = R^dagger * R  =  g_fermion^dag * D^{-1 dag} * D^{-1} * g_fermion = g_fermion Q^-1 g_fermion */

  /* PF1 det(1/(Q^2 + mu^2)) */
  for(i=0; i<GRIDPOINTS; i++) {
    g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
    g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
  }
  squnrm = square_norm(g_X);
  
  // step iv): g_fermion = \phi = K^dag * g_X = K^dag * \xi
  gam5D_wilson(g_fermion, g_X);
  assign_diff_mul(g_fermion, g_X, 0.+I*sqrt(g_musqr));
  ham_old += squnrm;

  /* PF2 det((Q^2 + mu^2)/Q^2) */
  if(no_timescales > 2) {
    for(i=0; i<GRIDPOINTS; i++) {
      g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
      g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
    }
    squnrm = square_norm(g_X);

    cg(g_fermion2, g_X, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);    
    gam5D_wilson(g_gam5DX, g_fermion2);
    assign_add_mul(g_gam5DX, g_fermion2, 0.+I*sqrt(g_musqr));
    gam5D_wilson(g_fermion2, g_gam5DX);
    ham_old += squnrm;
  }
  // Add the part for the fermion fields

  // Do the molecular dynamic chain
  /* the simple LF scheme */

  /* the second order minimal norm multi-timescale integrator*/
  /* MN2_integrator(g_steps, 2, g_steps*g_stepsize, 0.2); */

  /* This is the recursive implementation */
  /* in can be found in rec_lf_integrator.c|h */
  if (no_timescales == 1)
    leapfrog(n_steps[0], tau/n_steps[0]);
  else
    integrate_leap_frog(tau/n_steps[no_timescales-1], no_timescales-1, no_timescales, n_steps, 1, up_momenta);
  
  // Calculate the new action and hamiltonian
  ham = 0;
  s_g = 0;
  for (i=0; i<GRIDPOINTS; i++) {
    s_g += S_G(i);
    ham += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  /* Sum_ij [(g_fermion^*)_i (Q^-1)_ij (g_fermion)_j]  =  Sum_ij [(g_fermion^*)_i (g_X)_i] */
  ham += s_g;
  // add in the part for the fermion fields.
  cg(g_X, g_fermion, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);
  ham += scalar_prod_r(g_fermion, g_X);
  
  if(no_timescales > 2) {
    cg(g_gam5DX, g_fermion2, ITER_MAX, DELTACG, &gam5D_SQR_wilson);
    gam5D_SQR_musqr_wilson(g_X, g_temp, g_gam5DX);
    ham += scalar_prod_r(g_fermion2, g_X);
  }

  exphdiff = exp(ham_old-ham);
  acc = accept(exphdiff);
 
  for(i=0; i<GRIDPOINTS; i++) {
    gauge1_old[i]=gauge1[i];
    gauge2_old[i]=gauge2[i];
  }
 
  s_g_old = s_g;
  return(acc);
}
Example #17
0
/* should have better condition than gam5D_SQR_wilson() */
void gam5D_SQR_musqr_wilson(spinor *out, spinor *temp, spinor *in) {
  gam5D_wilson(temp, in);
  gam5D_wilson(out, temp);
  assign_add_mul(out, in, g_musqr);
}