예제 #1
0
int main()
{
  Teuchos::SerialDenseMatrix<int, double> A(4,4);
  Teuchos::SerialDenseMatrix<int, double> B(4,1);
  Teuchos::SerialDenseMatrix<int, double> X(4,1);
  A.random();
  B.random();
  X.putScalar(0.0);
  
  int maxit=20;
  double tolerance=1e-6;

  gmres(A,X,B,maxit,tolerance);
 
  return 0;
}
int generalized_minimal_residual_method(int , char **)
{
    typedef double value_type;
    typedef std::string key_type;
    typedef std::map<key_type,std::vector<value_type> > stats_type;

    stats_type stats;
    const int system_size = 20;
    GeneralizedMinimalResidualMethod<value_type,50> gmres(system_size);
//     function_type<system_size> F;
//     value_type x[system_size];
//     value_type z[system_size]  = {0};
//     value_type f0[system_size] = {0};
//     std::fill(x,x+system_size,2.0);
//     F ( x,f0 );
// //     std::transform(f0, f0 + system_size,f0,std::negate<value_type>());
//     for(int i = 0; i < 2; ++i)
//         gmres ( F,f0,z,1e-16,&stats );
// 
//     display_stats(stats);
    return 0;
}
예제 #3
0
int main()
{
	int p, q, r, N, max_restart, max_iter;
	clock_t t1, t2;
	printf("\n");
	printf(" Input N = 2^p * 3^q * 5^r - 1, (p, q, r) =  ");
	scanf("%d %d %d", &p, &q, &r);
	N = pow(2, p) * pow(3, q) * pow(5, r) - 1;
	printf(" Please input max restart times max_restart = ");
	scanf("%d",&max_restart);
	printf(" Please input max iteration times max_iter = ");
	scanf("%d",&max_iter);
	printf("\n N = %d , max_restart = %d , max_iter = %d \n \n", N, max_restart, max_iter);
	
	double *A, *D, *x, *b, *u, tol;
	A = (double *) malloc(N*N*sizeof(double));
	D = (double *) malloc(N*N*sizeof(double));
	x = (double *) malloc(N*N*sizeof(double));
	b = (double *) malloc(N*N*sizeof(double));
	u = (double *) malloc(N*N*sizeof(double));
	
	initial_x(x, N);
	initial_A(A, N);
	initial_D(D, N);
	source(b, N);

	tol = 1.0e-6;
	t1 = clock();
	gmres(A, D, x, b, N, max_restart, max_iter, tol);
	t2 = clock();
	exact_solution(u, N);
	//printf(" u[%d][%d] = %f \n", N/2, N/2, u[N*N/2+N/2]);
	//printf(" x[%d][%d] = %f \n", N/2, N/2, x[N*N/2+N/2]);
	
	printf(" error = %e \n", error(x, u, N*N));
	printf(" times = %f \n \n", 1.0*(t2-t1)/CLOCKS_PER_SEC);
	
	return 0;
}
/* 
* ====================================================================
* Solve the full system of equations including direct and grid parts.
*
* Note: This routine does not "need to know" about definitions of 
* grid, sparse matrices, elements etc. 
*
* This is still in TEST PHASE. I need to implement routines for dealing 
* with boundary conditions (in "elements.c"?) and setting up a 
* right-hand-side. These data should be passed on by the calling routine!
*
* Maybe later implement restarted GMRES to decrease memory consumption?
* ==================================================================== */
void solveFull(void *directMat, void *interpMat, void *projectMat,
	       void *precondMat, char *precondType, void *grid,
	       double *sourceVector, double *destVector,
	       int matNum, 
	       int numColsDirect, int numRowsDirect)
{
  char fctName[] = "solveFull";
  int testSolution = 0; /* Perform a final matrix-vector product to 
			 * confirm the soltion accuracy.               */
  int i, iter;
  double norm1, norm2, tol2;
  struct fullSolveBundle bundleData[1];
  double err, *testVector=NULL, *solutionVectorTmp, *rightHandSideTmp;

  /* These should really be passed from the calling routine            */
  double tol = 1.0E-4, *solutionVector, *rightHandSide;
  int modifyTol=1; /* 1 if tol may be modified based on the RHS norm */
  int maxiter  = 100; /* Maximum number of iteration in the gmres      */
  /* Never do more iterations than the number of unknowns              */
  maxiter = MIN(maxiter, numColsDirect);

  /*
  solutionVector   = (double*) calloc(numColsDirect,sizeof(double));
  rightHandSide    = (double*) calloc(numColsDirect,sizeof(double));
  */
  solutionVector   = destVector;
  rightHandSide    = sourceVector;
  


/* JKW DUMMY = unit vector right-hand-side: */
/*
  fprintf(stderr,"%s: WARNING: Using dummy right-hand-side!\n",fctName);
  for (i=0; i<numColsDirect; i++)
    rightHandSide[i] = ((double) rand())/((double) RAND_MAX);
  for (i=0; i<numColsDirect; i++)
    rightHandSide[i] = 0.0;
  rightHandSide[0] = 1.0;
*/

  printf("%s: Solving system with preconditioner: %s\n",
	 fctName,precondType);

  /* Bundle data for the "matrix multiply routine". Use a structure 
   * for the bundle, so that it cannot be messed up easily.            */
  bundleData->directMat     = directMat;
  bundleData->interpMat     = interpMat;
  bundleData->projectMat    = projectMat;
  bundleData->precondMat    = precondMat;
  bundleData->precondType   = precondType;
  bundleData->numColsDirect = numColsDirect;
  bundleData->numRowsDirect = numRowsDirect;
  bundleData->grid          = grid;

  /* Presently, only work on the first matrix. 
   * This *could* be changed at a later point. */
  bundleData->matNum = matNum;
  /* Allocate working vector for the "matrix multiply routine" */
  /*  Work vectors for eval values for direct and grid parts           */
  if (strcmp(precondType,"right")==0){
    /* Make an extra work vector to store source values                */
    bundleData->workSrc  = (double *) calloc(numColsDirect,sizeof(double));
  }
  else if (strcmp(precondType,"left")==0){
    /* Make an extra work vector to store eval values                    */
    bundleData->workEval  = (double *) calloc(numRowsDirect,sizeof(double));
  }
  else if (strcmp(precondType,"none")!=0){
    /* This is everything else, which is unknown = bad! 
     * Print error and exit */
    fprintf(stderr,"%s: ERROR. Unknown preconditioner type\n"
	    "    Please use \"none\", \"left\" or \"right\"\n"
	    "    Exiting\n",
	    fctName);
    exit(1);
  }

  /* === Call iterative solver === */
  /* Possibly repeat the call to the iterative solver.
   * Note that the memory consumption is O(maxiter^2*N) and the 
   * CPU consumption is O(maxgmres*maxiter^2*N), so it may pay out 
   * to use restarted gmres.                                           */
  if (strcmp(precondType,"none")==0){
    /*  NO precond, solve:     Ax  = b */
      iter = gmres(solutionVector, rightHandSide,
		   numColsDirect, tol, maxiter,
		   fullMatrixMultiply,
		   (void*) bundleData);
  }
  else if (strcmp(precondType,"right")==0){
    /*  RIGHT precond, solve:     APy  = b */
    /* Modify stop criterion based on the 2-norm of the 
     * right-hand-side vector                                         */
    for (i=0,norm1=0.0;i<numColsDirect;i++)
      norm1 += rightHandSide[i]*rightHandSide[i];
    norm1 = sqrt(norm1);
    tol2 = tol;
    if (norm1>0.0 && modifyTol){
      fprintf(stdout,"%s: Scaling tolerance with ||b|| = %g\n",
	      fctName,norm1);
      tol2 *= norm1;
      fprintf(stdout,
	      "%s: Aiming for tolerance %g, rather than the specified %g\n",
	      fctName,tol2,tol);
    }
    solutionVectorTmp = (double*) calloc(numColsDirect,sizeof(double));
    iter = gmres(solutionVectorTmp, rightHandSide,
		 numColsDirect, tol2, maxiter,
		 fullMatrixMultiply,
		 (void*) bundleData);
    /*  Then calculate x = Py. */
    spMatMultiply(precondMat, solutionVectorTmp, 
		  solutionVector, matNum);
    FREE(solutionVectorTmp); 
  }
  else if (strcmp(precondType,"left")==0){
    /* LEFT preconditioner - calculate y := Pb. */
    rightHandSideTmp = (double*) calloc(numColsDirect,sizeof(double));
    spMatMultiply(precondMat,rightHandSide,rightHandSideTmp,matNum);
    /* Note that rightHandSideTmp and rightHandSide do not have the 
     * same norm. In fact they may differ considerably, so make sure 
     * this is taken into account when selecting the accuracy of the 
     * solution. At the moment I'm slightly at a loss on how to do 
     * this accurately. For now, the tolerance will be scaled by 
     * ||Pb||/||b||, where P is the preconditioner and b is the right 
     * hand side (Pb is the new right hand side). 
     * Issue a warning statement about this                        */
    fprintf(stderr,"%s: WARNING: Scaling tolerance with ||Pb||/||b||!\n",
	    fctName);
    for (i=0,norm1=0.0,norm2=0.0;i<numColsDirect;i++){
      norm1 += rightHandSide[i]*rightHandSide[i];
      norm2 += rightHandSideTmp[i]*rightHandSideTmp[i];
    }
    norm1 = sqrt(norm1);
    norm2 = sqrt(norm2);
    if(norm1>0.0 && modifyTol){
      tol2 = tol*norm2/norm1; 
      fprintf(stderr,
	      "%s: Aiming for tolerance %g, rather than the specified %g!\n",
	      fctName,tol2,tol);
    }
    else tol2 = tol;
    /* Now solve PAx  = y */
    iter = gmres(solutionVector, rightHandSideTmp,
		 numColsDirect, tol2, maxiter,
		 fullMatrixMultiply,
		 (void*) bundleData);
    FREE(rightHandSideTmp);
  }
  printf("%s: GMRES used %d iterations\n",fctName,iter);

  /* Test the solution making a matrix-vector product 
   * (no preconditioning) A*x and comparing with the right-hand-side   */
  if (testSolution) {
    bundleData->precondType   = "none";
    testVector = (double*) calloc(numColsDirect,sizeof(double));
    fullMatrixMultiply(solutionVector,testVector,numColsDirect,
		       (void*)bundleData);
    for (i=0, err=0.0; i<numColsDirect; i++)
      err += (testVector[i]-rightHandSide[i])
	*(testVector[i]-rightHandSide[i]);
    err=sqrt(err);
    printf(" Estimated ||err||:  %g\n",err);
  }

  /* TEST SECTION */
  if (0){
    if (0){
      for (i=0; i<numColsDirect; i++){
	printf("%g %g\n",solutionVector[i],rightHandSide[i]);
      }
    }
    /*dumpSparseMatrixTranspose(precondMat,0);*/
    if (0){
      int j;
      for (j=0;j<numColsDirect;j++){
	for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0;
	testVector[j]=1;
	fullMatrixMultiply(testVector,solutionVector,numColsDirect,
			   (void*)bundleData);
	printf("Column %d: \n",j);
	for (i=0; i<numColsDirect; i++)
	  printf(" %12.6e",solutionVector[i]);
	printf("\n");
      }
    }
    if (0){/* Dump transpose matrix to file: */
      int j;
      FILE *dumpFile;
      dumpFile = fopen("mat.out","w");
      printf("Writing to mat.out\n");

      for (j=0;j<numColsDirect;j++){
	for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0;
	testVector[j]=1;
	fullMatrixMultiply(testVector,solutionVector,numColsDirect,
			   (void*)bundleData);
	for (i=0; i<numColsDirect; i++)
	  fprintf(dumpFile," %15.8e",solutionVector[i]);
	fprintf(dumpFile,"\n");
      }
      fclose(dumpFile);
   }
    if (0){ /* Calculate and write A*e_1: */
      int j;
      FILE *dumpFile;
      dumpFile = fopen("a_times_e1.out","w");

      j=0;
      for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0;
      testVector[j]=1;
      fullMatrixMultiply(testVector,solutionVector,numColsDirect,
			 (void*)bundleData);
      printf("Writing to %s\n","a_times_e1.out");
      for (i=0; i<numColsDirect; i++)
	fprintf(dumpFile," %12.6e\n",solutionVector[i]);
    }
  }
  /* Test a preconditioner. For a small system P*A=I, so PA*b=b 
  for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0;
  bundleData->precondType   = "none";
  testVector = (double*) calloc(numColsDirect,sizeof(double));
  fullMatrixMultiply(rightHandSide,testVector,numColsDirect,
		     (void*)bundleData);
  spMatMultiply(precondMat,testVector,solutionVector,matNum);
  for (i=0; i<numColsDirect; i++)
    printf(" %16.8e %16.8e %16.8e \n",rightHandSide[i],solutionVector[i],testVector[i]);
  */



  /* Free working vector and other stuff used. */
  IFFREE(testVector);
  /*FREE(solutionVector);
    FREE(rightHandSide);*/
  if (strcmp(precondType,"right")==0){
    FREE(bundleData->workSrc);
  }
  else if (strcmp(precondType,"left")==0){
    FREE(bundleData->workEval);
  }
      
  return; /* Should probably return (a pointer to) the solution vector? */
} /* End of routine solveFull */
예제 #5
0
파일: main.cpp 프로젝트: phelrine/NBTools
//=============================================================================
int main(int argc, char** argv)
{
  ///////////////////////////////////////////////
  //////////////// set precision ////////////////
  ///////////////////////////////////////////////
  std::cout.precision(9);
  std::cout.setf(std::ios::scientific, std::ios::floatfield);
  std::cout.setf(std::ios::showpos);
  std::cerr.precision(3);
  std::cerr.setf(std::ios::scientific, std::ios::floatfield);
  std::cerr.setf(std::ios::showpos);
  
  ///////////////////////////////////////////////
  //////////////////// A,b //////////////////////
  ///////////////////////////////////////////////
  CPPL::dgsmatrix A;
  CPPL::dcovector b;
  //const bool file =true;
  const bool file =false;
  ///////////////////////////
  ////////// read ///////////
  ///////////////////////////
  if(file){
    A.read("A8.dge");  b.read("b8.dco");
    //A.read("A10.dge");  b.read("b10.dco");
    //A.read("A44.dge"); b.read("b44.dco");
  }
  ///////////////////////////
  ///////// random //////////
  ///////////////////////////
  else{//file==false
    std::cerr << "# making random matrix" << std::endl;
    const int size(1000);
    A.resize(size,size);
    b.resize(size);
    srand(time(NULL));
    for(int i=0; i<size; i++){
      for(int j=0; j<size; j++){
        if( rand()%5==0 ){
          A(i,j) =(double(rand())/double(RAND_MAX))*2.0 -1.0;
        }
      }
      A(i,i) +=1e-2*double(size);//generalization
      b(i) =(double(rand())/double(RAND_MAX))*1. -0.5;
    }
    A.write("A.dge");
    b.write("b.dco");
  }

  ///////////////////////////////////////////////
  ////////////////// direct /////////////////////
  ///////////////////////////////////////////////
  std::cerr << "# making solution with dgesv" << std::endl;
  CPPL::dgematrix A2(A.to_dgematrix());
  CPPL::dcovector b2(b);
  A2.dgesv(b2);
  b2.write("ans.dco");
  
  ///////////////////////////////////////////////
  ///////////////// iterative ///////////////////
  ///////////////////////////////////////////////
  //////// initial x ////////
  CPPL::dcovector x(b.l);
  //x.read("x_ini8.dco");
  x.zero();
  //////// eps ////////
  double eps(fabs(damax(b))*1e-6);
  std::cerr << "eps=" << eps << std::endl;
  //////// solve ////////
  if( gmres(A, b, x, eps) ){
    std::cerr << "failed." << std::endl;
    x.write("x.dco");
    exit(1);
  }
  x.write("x.dco");
  std::cerr << "fabs(damax(err))=" << fabs(damax(b2-x)) << std::endl;
  
  return 0;
}
예제 #6
0
파일: jdher.c 프로젝트: Finkenrath/tmLQCD
void jdher(int n, int lda, double tau, double tol, 
	   int kmax, int jmax, int jmin, int itmax,
	   int blksize, int blkwise, 
	   int V0dim, _Complex double *V0, 
	   int solver_flag, 
	   int linitmax, double eps_tr, double toldecay,
	   int verbosity,
	   int *k_conv, _Complex double *Q, double *lambda, int *it,
	   int maxmin, int shift_mode,
	   matrix_mult A_psi) {

  /****************************************************************************
   *                                                                          *
   * Local variables                                                          *
   *                                                                          *
   ****************************************************************************/
  
  /* constants */

  /* allocatables: 
   * initialize with NULL, so we can free even unallocated ptrs */
  double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;

  _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL,
    *Res_ = NULL, *Res,
    *eigwork = NULL, *temp1_ = NULL, *temp1;

  int *idx1 = NULL, *idx2 = NULL, 
    *convind = NULL, *keepind = NULL, *solvestep = NULL, 
    *actcorrits = NULL;

  /* non-allocated ptrs */
  _Complex double *q, *v, *u, *r = NULL;  
/*   _Complex double *matdummy, *vecdummy; */

  /* scalar vars */
  double theta, alpha, it_tol;

  int i, k, j, actblksize, eigworklen, found, conv, keep, n2;
  int act, cnt, idummy, info, CntCorrIts=0, endflag=0;
  int N = n*sizeof(_Complex double)/sizeof(spinor);

  /* variables for random number generator */
  int IDIST = 1;
  int ISEED[4] = {2, 3, 5, 7};
  ISEED[0] = g_proc_id+2;

  /****************************************************************************
   *                                                                          *
   * Execution starts here...                                                 *
   *                                                                          *
   ****************************************************************************/


  /* print info header */
  if ((verbosity > 2) && (g_proc_id == 0)){
    printf("Jacobi-Davidson method for hermitian Matrices\n");
    printf("Solving  A*x = lambda*x \n\n");
    printf("  N=      %10d  ITMAX=%4d\n", n, itmax);
    printf("  KMAX=%3d  JMIN=%3d  JMAX=%3d  V0DIM=%3d\n", 
	   kmax, jmin, jmax, V0dim);
    printf("  BLKSIZE=        %2d  BLKWISE=      %5s\n", 
	   blksize, blkwise ? "TRUE" : "FALSE");
    printf("  TOL=  %11.4e TAU=  %11.4e\n", 
	   tol, tau);
    printf("  LINITMAX=    %5d  EPS_TR=  %10.3e  TOLDECAY=%9.2e\n", 
	   linitmax, eps_tr, toldecay);
    printf("\n Computing %s eigenvalues\n",
	   maxmin ? "maximal" : "minimal");
    printf("\n");
    fflush( stdout );
  }

  /* validate input parameters */
  if(tol <= 0) jderrorhandler(401,"");
  if(kmax <= 0 || kmax > n) jderrorhandler(402,"");
  if(jmax <= 0 || jmax > n) jderrorhandler(403,"");
  if(jmin <= 0 || jmin > jmax) jderrorhandler(404,"");
  if(itmax < 0) jderrorhandler(405,"");
  if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,"");
  if(blksize <= 0 || blksize > kmax) jderrorhandler(406,"");
  if(blkwise < 0 || blkwise > 1) jderrorhandler(407,"");
  if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,"");
  if(linitmax < 0) jderrorhandler(409,"");
  if(eps_tr < 0.) jderrorhandler(500,"");
  if(toldecay <= 1.0) jderrorhandler(501,"");
  
  CONE = 1.;
  CZERO = 0.;
  CMONE = -1.;

  /* Get hardware-dependent values:
   * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt.
   * block size... */
  eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax;

  /* Allocating memory for matrices & vectors */ 

  if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) {
    errno = 0;
    jderrorhandler(300,"V in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  V = (_Complex double*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  V = V_;
#endif
  if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"U in jdher");
  }
  if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) {
    jderrorhandler(300,"s in jdher");
  }
  if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"Res in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  Res = (_Complex double*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  Res = Res_;
#endif
  if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) {
    jderrorhandler(300,"resnrm in jdher");
  }
  if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) {
    jderrorhandler(300,"resnrm_old in jdher");
  }
  if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"M in jdher");
  }
  if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"Vtmp in jdher");
  }
  if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"p_work in jdher");
  }

  /* ... */
  if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) {
    jderrorhandler(300,"idx1 in jdher");
  }
  if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) {
    jderrorhandler(300,"idx2 in jdher");
  }

  /* Indices for (non-)converged approximations */
  if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"convind in jdher");
  }
  if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"keepind in jdher");
  }
  if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"solvestep in jdher");
  }
  if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"actcorrits in jdher");
  }

  if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"eigwork in jdher");
  }
  if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) {
    jderrorhandler(300,"rwork in jdher");
  }
  if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"temp1 in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  temp1 = (_Complex double*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  temp1 = temp1_;
#endif
  if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"dtemp in jdher");
  }

  /* Set variables for Projection routines */
  n2 = 2*n;
  p_n = n;
  p_n2 = n2;
  p_Q = Q;
  p_A_psi = A_psi;
  p_lda = lda;

  /**************************************************************************
   *                                                                        *
   * Generate initial search subspace V. Vectors are taken from V0 and if   *
   * necessary randomly generated.                                          *
   *                                                                        *
   **************************************************************************/

  /* copy V0 to V */
  _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1);
  j = V0dim;
  /* if V0dim < blksize: generate additional random vectors */
  if (V0dim < blksize) {
    idummy = (blksize - V0dim)*n; /* nof random numbers */
    _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda);
    j = blksize;
  }
  for (cnt = 0; cnt < j; cnt ++) {
    ModifiedGS(V + cnt*lda, n, cnt, V, lda);
    alpha = sqrt(square_norm((spinor*)(V+cnt*lda), N, 1));
    alpha = 1.0 / alpha;
    _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE);
  }

  /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle
     is computed. */
  for (cnt = 0; cnt < j; cnt++){
    A_psi((spinor*) temp1, (spinor*)(V+cnt*lda));
    idummy = cnt+1;
    for(i = 0; i < idummy; i++){
      M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1);
    }
  }

  /* Other initializations */
  k = 0; (*it) = 0; 
  if((*k_conv) > 0) {
    k = *k_conv;
  }

  actblksize = blksize; 
  for(act = 0; act < blksize; act ++){
    solvestep[act] = 1;
  }


  /****************************************************************************
   *                                                                          *
   * Main JD-iteration loop                                                   *
   *                                                                          *
   ****************************************************************************/

  while((*it) < itmax) {

    /****************************************************************************
     *                                                                          *
     * Solving the projected eigenproblem                                       *
     *                                                                          *
     * M*u = V^dagger*A*V*u = s*u                                                     *
     * M is hermitian, only the upper triangle is stored                        *
     *                                                                          *
     ****************************************************************************/
    _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1);
    _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); 

    if (info != 0) {
      printf("error solving the projected eigenproblem.");
      printf(" zheev: info = %d\n", info);
    }
    if(info != 0) jderrorhandler(502,"proble in zheev");
  

    /* Reverse order of eigenvalues if maximal value is needed */
    if(maxmin == 1){
      sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); 
    }
    else{
      sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); 
    }
    /****************************************************************************
     *                                                                          *
     * Convergence/Restart Check                                                *
     *                                                                          *
     * In case of convergence, strip off a whole block or just the converged    *
     * ones and put 'em into Q.  Update the matrices Q, V, U, s                 *
     *                                                                          *
     * In case of a restart update the V, U and M matrices and recompute the    *
     * Eigenvectors                                                             *
     *                                                                          *
     ****************************************************************************/

    found = 1;
    while(found) {

      /* conv/keep = Number of converged/non-converged Approximations */
      conv = 0; keep = 0;

      for(act=0; act < actblksize; act++){

	/* Setting pointers for single vectors */
	q = Q + (act+k)*lda; 
	u = U + act*jmax; 
	r = Res + act*lda; 
	
	/* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */
	theta = s[act];
	_FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1);

	/* Compute the residual */
	A_psi((spinor*) r, (spinor*) q); 
	theta = -theta;
	_FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);

	/* Compute norm of the residual and update arrays convind/keepind*/
	resnrm_old[act] = resnrm[act];
	resnrm[act] = sqrt(square_norm((spinor*) r, N, 1));
	if (resnrm[act] < tol){
	  convind[conv] = act; 
	  conv = conv + 1; 
	}
	else{
	  keepind[keep] = act; 
	  keep = keep + 1; 
	}
	
      }  /* for(act = 0; act < actblksize; act ++) */

      /* Check whether the blkwise-mode is chosen and ALL the
	 approximations converged, or whether the strip-off mode is
	 active and SOME of the approximations converged */

      found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) 
	&& (j > actblksize || k == kmax - actblksize);
      
      /***************************************************************************
	*                                                                        *
	* Convergence Case                                                       *
	*                                                                        *
	* In case of convergence, strip off a whole block or just the converged  *
	* ones and put 'em into Q.  Update the matrices Q, V, U, s               *
	*                                                                        *
	**************************************************************************/

      if (found) {

	/* Store Eigenvalues */
	for(act = 0; act < conv; act++)
	  lambda[k+act] = s[convind[act]];
	 
	/* Re-use non approximated Ritz-Values */
	for(act = 0; act < keep; act++)
	  s[act] = s[keepind[act]];

	/* Shift the others in the right position */
	for(act = 0; act < (j-actblksize); act ++)
	  s[act+keep] = s[act+actblksize];

	/* Update V. Re-use the V-Vectors not looked at yet. */
	idummy = j - actblksize;
	for (act = 0; act < n; act = act + jmax) {
	  cnt = act + jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1);
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, 
		     &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1);
	}

	/* Insert the not converged approximations as first columns in V */
	for(act = 0; act < keep; act++){
	  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1);
	}

	/* Store Eigenvectors */
	for(act = 0; act < conv; act++){
	  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1);
	}

	/* Update SearchSpaceSize j */
	j = j - conv;

	/* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ 
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++)
	  M[act*jmax + act] = s[act];
	
	/* ... and U the Identity(jnew,jnew) */
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);

	if(shift_mode == 1){
	  if(maxmin == 0){
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] > tau){
		tau = lambda[k+act];
	      }
	    }
	  }
	  else{
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] < tau){
		tau = lambda[k+act];
	      }
	    } 
	  }
	}
	 
	/* Update Converged-Eigenpair-counter and Pro_k */
	k = k + conv;

	/* Update the new blocksize */
	actblksize=min(blksize, kmax-k);

	/* Exit main iteration loop when kmax eigenpairs have been
           approximated */
	if (k == kmax){
	  endflag = 1;
	  break;
	}
	/* Counter for the linear-solver-accuracy */
	for(act = 0; act < keep; act++)
	  solvestep[act] = solvestep[keepind[act]];

	/* Now we expect to have the next eigenvalues */
	/* allready with some accuracy                */
	/* So we do not need to start from scratch... */
	for(act = keep; act < blksize; act++)
	  solvestep[act] = 1;

      } /* if(found) */
      if(endflag == 1){
	break;
      }
      /**************************************************************************
       *                                                                        *
       * Restart                                                                *
       *                                                                        *
       * The Eigenvector-Aproximations corresponding to the first jmin          *
       * Petrov-Vectors are kept.  if (j+actblksize > jmax) {                   *
       *                                                                        *
       **************************************************************************/
      if (j+actblksize > jmax) {

	idummy = j; j = jmin;

	for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */
	  cnt = act+jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1);
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, 
		     &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1);
	}
	  
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++)
	  M[act*jmax + act] = s[act];
      }

    } /* while(found) */    

    if(endflag == 1){
      break;
    }

    /****************************************************************************
     *                                                                          *
     * Solving the correction equations                                         *
     *                                                                          *
     *                                                                          *
     ****************************************************************************/

    /* Solve actblksize times the correction equation ... */
    for (act = 0; act < actblksize; act ++) {      

      /* Setting start-value for vector v as zeros(n,1). Guarantees
         orthogonality */
      v = V + j*lda;
      for (cnt = 0; cnt < n; cnt ++){ 
	v[cnt] = 0.;
      }

      /* Adaptive accuracy and shift for the lin.solver. In case the
	 residual is big, we don't need a too precise solution for the
	 correction equation, since even in exact arithmetic the
	 solution wouldn't be too usefull for the Eigenproblem. */
      r = Res + act*lda;

      if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){
	p_theta = s[act];
      }
      else{
	p_theta = tau;
      }
      p_k = k + actblksize;

      /* if we are in blockwise mode, we do not want to */
      /* iterate solutions much more, if they have      */
      /* allready the desired precision                 */
      if(blkwise == 1 && resnrm[act] < tol) {
	it_tol = pow(toldecay, (double)(-5));
      }
      else {
	it_tol = pow(toldecay, (double)(-solvestep[act]));
      }
      solvestep[act] = solvestep[act] + 1;


      /* equation and project if necessary */
      ModifiedGS(r, n, k + actblksize, Q, lda);

      /* Solve the correction equation ...  */
      g_sloppy_precision = 1;
      if(solver_flag == GMRES){
/* 	info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, &Proj_A_psi, &Proj_A_psi); */
	info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, 0, 
		     n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi);
      }
      if(solver_flag == CGS){
	info = cgs_real((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0,
			n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else if (solver_flag == BICGSTAB){
	info = bicgstab_complex((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0,
				n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else if (solver_flag == CG){
	info = cg_her((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, 
		      n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else{
 	info = gmres((spinor*) v, (spinor*) r, 10, linitmax, it_tol*it_tol, 0,
		     n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); 
      }
      g_sloppy_precision = 0;

      /* Actualizing profiling data */
      if (info == -1){
	CntCorrIts += linitmax;
      }
      else{
	CntCorrIts += info;
      }
      actcorrits[act] = info;

      /* orthonormalize v to Q, cause the implicit
	 orthogonalization in the solvers may be too inaccurate. Then
	 apply "IteratedCGS" to prevent numerical breakdown 
         in order to orthogonalize v to V */

      ModifiedGS(v, n, k+actblksize, Q, lda);
      IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda);

      alpha = 1.0 / alpha;
      _FT(dscal)(&n2, &alpha, (double*) v, &ONE);
      
      /* update interaction matrix M */
      A_psi((spinor*) temp1, (spinor*) v);
      idummy = j+1;
      for(i = 0; i < idummy; i++) {
	M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1);
      }
      
      /* Increasing SearchSpaceSize j */
      j ++;
    }   /* for (act = 0;act < actblksize; act ++) */    

    /* Print information line */
    if(g_proc_id == 0) {
      print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, 
		   s, resnrm, actcorrits);
    }

    /* Increase iteration-counter for outer loop  */
    (*it) = (*it) + 1;

  } /* Main iteration loop */
  
  /******************************************************************
   *                                                                *
   * Eigensolutions converged or iteration limit reached            *
   *                                                                *
   * Print statistics. Free memory. Return.                         *
   *                                                                *
   ******************************************************************/

  (*k_conv) = k;
  if (g_proc_id == 0 && verbosity > 0) {
    printf("\nJDHER execution statistics\n\n");
    printf("IT_OUTER=%d   IT_INNER_TOT=%d   IT_INNER_AVG=%8.2f\n",
	   (*it), CntCorrIts, (double)CntCorrIts/(*it));
    printf("\nConverged eigensolutions in order of convergence:\n");
    printf("\n  I              LAMBDA(I)      RES(I)\n");
    printf("---------------------------------------\n");
  }    
  for (act = 0; act < *k_conv; act ++) {
    /* Compute the residual for solution act */
    q = Q + act*lda;
    theta = -lambda[act];
    A_psi((spinor*) r, (spinor*) q);
    _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
    alpha = sqrt(square_norm((spinor*) r, N, 1));
    if(g_proc_id == 0 && verbosity > 0) {
      printf("%3d %22.15e %12.5e\n", act+1, lambda[act],
	     alpha);
    }
  }
  if(g_proc_id == 0 && verbosity > 0) {
    printf("\n");
    fflush( stdout );
  }

  free(V_); free(Vtmp); free(U); 
  free(s); free(Res_); 
  free(resnrm); free(resnrm_old); 
  free(M); free(Z);
  free(eigwork); free(temp1_);
  free(dtemp); free(rwork);
  free(p_work);
  free(idx1); free(idx2); 
  free(convind); free(keepind); free(solvestep); free(actcorrits);
  
} /* jdher(.....) */
예제 #7
0
파일: RBEC.cpp 프로젝트: FengYueZJU/Study
void RBEC::stepForward()
{
    int i, j, k, l;
    int n_dof = fem_space.n_dof();
    int n_total_dof = 2 * n_dof;

    mat_RBEC.reinit(sp_RBEC);
    mat_rere.reinit(sp_rere);
    mat_reim.reinit(sp_reim);
    mat_imre.reinit(sp_imre);
    mat_imim.reinit(sp_imim);

    Vector<double> phi(n_total_dof);
    FEMFunction <double, DIM> phi_star(fem_space);
    Vector<double> rhs(n_total_dof);
    Potential V(gamma_x, gamma_y);

/// 准备一个遍历全部单元的迭代器.
    FEMSpace<double, DIM>::ElementIterator the_element = fem_space.beginElement();
    FEMSpace<double, DIM>::ElementIterator end_element = fem_space.endElement();

/// 循环遍历全部单元, 只是为了统计每一行的非零元个数.
    for (; the_element != end_element; ++the_element)
    {
/// 当前单元信息.
	double volume = the_element->templateElement().volume();
	const QuadratureInfo<DIM>& quad_info = the_element->findQuadratureInfo(6);
	std::vector<double> jacobian = the_element->local_to_global_jacobian(quad_info.quadraturePoint());
	int n_quadrature_point = quad_info.n_quadraturePoint();
	std::vector<AFEPack::Point<DIM> > q_point = the_element->local_to_global(quad_info.quadraturePoint());
/// 单元信息.
	std::vector<std::vector<std::vector<double> > > basis_gradient = the_element->basis_function_gradient(q_point);
	std::vector<std::vector<double> >  basis_value = the_element->basis_function_value(q_point);
	std::vector<double> phi_re_value = phi_re.value(q_point, *the_element);
	std::vector<double> phi_im_value = phi_im.value(q_point, *the_element);
	const std::vector<int>& element_dof = the_element->dof();
	int n_element_dof = the_element->n_dof();
/// 实际拼装.
	for (l = 0; l < n_quadrature_point; ++l)
	{
	    double Jxw = quad_info.weight(l) * jacobian[l] * volume;
	    for (j = 0; j < n_element_dof; ++j)
	    {
		for (k = 0; k < n_element_dof; ++k)
		{

		    double cont = Jxw * ((1 / dt) * basis_value[j][l] * basis_value[k][l]
					 + 0.5 * innerProduct(basis_gradient[j][l], basis_gradient[k][l])
					 + V.value(q_point[l]) * basis_value[j][l] * basis_value[k][l]
					 + beta * (phi_re_value[l] * phi_re_value[l]  + phi_im_value[l] * phi_im_value[l]) * basis_value[j][l] * basis_value[k][l]);

		    mat_RBEC.add(element_dof[j], element_dof[k], cont);
		    mat_RBEC.add(element_dof[j] + n_dof, element_dof[k] + n_dof, cont);
		}
		rhs(element_dof[j]) += Jxw * phi_re_value[l] * basis_value[j][l] / dt;
		rhs(element_dof[j] + n_dof) += Jxw * phi_im_value[l] * basis_value[j][l] / dt;
	    }
	}
    }

    FEMFunction<double, DIM> _phi_re(phi_re);
    FEMFunction<double, DIM> _phi_im(phi_im);

    boundaryValue(phi, rhs, mat_RBEC);

//    AMGSolver solver(mat_RBEC);
//    solver.solve(phi, rhs);

    dealii::SolverControl solver_control(4000, 1e-15);
    SolverGMRES<Vector<double> >::AdditionalData para(500, false, true);
    SolverGMRES<Vector<double> > gmres(solver_control, para);
    gmres.solve(mat_RBEC, phi, rhs, PreconditionIdentity());

    for (int i = 0; i < n_dof; ++i)
    {
         phi_re(i) = phi(i);
         phi_im(i) = phi(n_dof + i);
    }
	
    for (int i = 0; i < n_dof; ++i)
        phi_star(i) = sqrt(phi_re(i) * phi_re(i) + phi_im(i) * phi_im(i));

    double L2Phi = Functional::L2Norm(phi_re, 6);

    std::cout << "L2 norm = " << L2Phi << std::endl;
       
    for (int i = 0; i < n_dof; ++i)
    {
	phi_re(i) /= L2Phi;
	phi_im(i) /= L2Phi;
    }

    double e = energy(phi_re, phi_im, 6);
    std::cout << "Energy = " << e << std::endl;

    
    t += dt;
};
예제 #8
0
파일: gmres_dr.c 프로젝트: palao/tmLQCD
/* In case there is no lapack use normal gmres */
int gmres_dr(spinor * const P,spinor * const Q, 
	  const int m, const int nr_ev, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, matrix_mult f){
  return(gmres(P, Q, m, max_restarts, eps_sq, rel_prec, N, 1, f));
}