int main() { Teuchos::SerialDenseMatrix<int, double> A(4,4); Teuchos::SerialDenseMatrix<int, double> B(4,1); Teuchos::SerialDenseMatrix<int, double> X(4,1); A.random(); B.random(); X.putScalar(0.0); int maxit=20; double tolerance=1e-6; gmres(A,X,B,maxit,tolerance); return 0; }
int generalized_minimal_residual_method(int , char **) { typedef double value_type; typedef std::string key_type; typedef std::map<key_type,std::vector<value_type> > stats_type; stats_type stats; const int system_size = 20; GeneralizedMinimalResidualMethod<value_type,50> gmres(system_size); // function_type<system_size> F; // value_type x[system_size]; // value_type z[system_size] = {0}; // value_type f0[system_size] = {0}; // std::fill(x,x+system_size,2.0); // F ( x,f0 ); // // std::transform(f0, f0 + system_size,f0,std::negate<value_type>()); // for(int i = 0; i < 2; ++i) // gmres ( F,f0,z,1e-16,&stats ); // // display_stats(stats); return 0; }
int main() { int p, q, r, N, max_restart, max_iter; clock_t t1, t2; printf("\n"); printf(" Input N = 2^p * 3^q * 5^r - 1, (p, q, r) = "); scanf("%d %d %d", &p, &q, &r); N = pow(2, p) * pow(3, q) * pow(5, r) - 1; printf(" Please input max restart times max_restart = "); scanf("%d",&max_restart); printf(" Please input max iteration times max_iter = "); scanf("%d",&max_iter); printf("\n N = %d , max_restart = %d , max_iter = %d \n \n", N, max_restart, max_iter); double *A, *D, *x, *b, *u, tol; A = (double *) malloc(N*N*sizeof(double)); D = (double *) malloc(N*N*sizeof(double)); x = (double *) malloc(N*N*sizeof(double)); b = (double *) malloc(N*N*sizeof(double)); u = (double *) malloc(N*N*sizeof(double)); initial_x(x, N); initial_A(A, N); initial_D(D, N); source(b, N); tol = 1.0e-6; t1 = clock(); gmres(A, D, x, b, N, max_restart, max_iter, tol); t2 = clock(); exact_solution(u, N); //printf(" u[%d][%d] = %f \n", N/2, N/2, u[N*N/2+N/2]); //printf(" x[%d][%d] = %f \n", N/2, N/2, x[N*N/2+N/2]); printf(" error = %e \n", error(x, u, N*N)); printf(" times = %f \n \n", 1.0*(t2-t1)/CLOCKS_PER_SEC); return 0; }
/* * ==================================================================== * Solve the full system of equations including direct and grid parts. * * Note: This routine does not "need to know" about definitions of * grid, sparse matrices, elements etc. * * This is still in TEST PHASE. I need to implement routines for dealing * with boundary conditions (in "elements.c"?) and setting up a * right-hand-side. These data should be passed on by the calling routine! * * Maybe later implement restarted GMRES to decrease memory consumption? * ==================================================================== */ void solveFull(void *directMat, void *interpMat, void *projectMat, void *precondMat, char *precondType, void *grid, double *sourceVector, double *destVector, int matNum, int numColsDirect, int numRowsDirect) { char fctName[] = "solveFull"; int testSolution = 0; /* Perform a final matrix-vector product to * confirm the soltion accuracy. */ int i, iter; double norm1, norm2, tol2; struct fullSolveBundle bundleData[1]; double err, *testVector=NULL, *solutionVectorTmp, *rightHandSideTmp; /* These should really be passed from the calling routine */ double tol = 1.0E-4, *solutionVector, *rightHandSide; int modifyTol=1; /* 1 if tol may be modified based on the RHS norm */ int maxiter = 100; /* Maximum number of iteration in the gmres */ /* Never do more iterations than the number of unknowns */ maxiter = MIN(maxiter, numColsDirect); /* solutionVector = (double*) calloc(numColsDirect,sizeof(double)); rightHandSide = (double*) calloc(numColsDirect,sizeof(double)); */ solutionVector = destVector; rightHandSide = sourceVector; /* JKW DUMMY = unit vector right-hand-side: */ /* fprintf(stderr,"%s: WARNING: Using dummy right-hand-side!\n",fctName); for (i=0; i<numColsDirect; i++) rightHandSide[i] = ((double) rand())/((double) RAND_MAX); for (i=0; i<numColsDirect; i++) rightHandSide[i] = 0.0; rightHandSide[0] = 1.0; */ printf("%s: Solving system with preconditioner: %s\n", fctName,precondType); /* Bundle data for the "matrix multiply routine". Use a structure * for the bundle, so that it cannot be messed up easily. */ bundleData->directMat = directMat; bundleData->interpMat = interpMat; bundleData->projectMat = projectMat; bundleData->precondMat = precondMat; bundleData->precondType = precondType; bundleData->numColsDirect = numColsDirect; bundleData->numRowsDirect = numRowsDirect; bundleData->grid = grid; /* Presently, only work on the first matrix. * This *could* be changed at a later point. */ bundleData->matNum = matNum; /* Allocate working vector for the "matrix multiply routine" */ /* Work vectors for eval values for direct and grid parts */ if (strcmp(precondType,"right")==0){ /* Make an extra work vector to store source values */ bundleData->workSrc = (double *) calloc(numColsDirect,sizeof(double)); } else if (strcmp(precondType,"left")==0){ /* Make an extra work vector to store eval values */ bundleData->workEval = (double *) calloc(numRowsDirect,sizeof(double)); } else if (strcmp(precondType,"none")!=0){ /* This is everything else, which is unknown = bad! * Print error and exit */ fprintf(stderr,"%s: ERROR. Unknown preconditioner type\n" " Please use \"none\", \"left\" or \"right\"\n" " Exiting\n", fctName); exit(1); } /* === Call iterative solver === */ /* Possibly repeat the call to the iterative solver. * Note that the memory consumption is O(maxiter^2*N) and the * CPU consumption is O(maxgmres*maxiter^2*N), so it may pay out * to use restarted gmres. */ if (strcmp(precondType,"none")==0){ /* NO precond, solve: Ax = b */ iter = gmres(solutionVector, rightHandSide, numColsDirect, tol, maxiter, fullMatrixMultiply, (void*) bundleData); } else if (strcmp(precondType,"right")==0){ /* RIGHT precond, solve: APy = b */ /* Modify stop criterion based on the 2-norm of the * right-hand-side vector */ for (i=0,norm1=0.0;i<numColsDirect;i++) norm1 += rightHandSide[i]*rightHandSide[i]; norm1 = sqrt(norm1); tol2 = tol; if (norm1>0.0 && modifyTol){ fprintf(stdout,"%s: Scaling tolerance with ||b|| = %g\n", fctName,norm1); tol2 *= norm1; fprintf(stdout, "%s: Aiming for tolerance %g, rather than the specified %g\n", fctName,tol2,tol); } solutionVectorTmp = (double*) calloc(numColsDirect,sizeof(double)); iter = gmres(solutionVectorTmp, rightHandSide, numColsDirect, tol2, maxiter, fullMatrixMultiply, (void*) bundleData); /* Then calculate x = Py. */ spMatMultiply(precondMat, solutionVectorTmp, solutionVector, matNum); FREE(solutionVectorTmp); } else if (strcmp(precondType,"left")==0){ /* LEFT preconditioner - calculate y := Pb. */ rightHandSideTmp = (double*) calloc(numColsDirect,sizeof(double)); spMatMultiply(precondMat,rightHandSide,rightHandSideTmp,matNum); /* Note that rightHandSideTmp and rightHandSide do not have the * same norm. In fact they may differ considerably, so make sure * this is taken into account when selecting the accuracy of the * solution. At the moment I'm slightly at a loss on how to do * this accurately. For now, the tolerance will be scaled by * ||Pb||/||b||, where P is the preconditioner and b is the right * hand side (Pb is the new right hand side). * Issue a warning statement about this */ fprintf(stderr,"%s: WARNING: Scaling tolerance with ||Pb||/||b||!\n", fctName); for (i=0,norm1=0.0,norm2=0.0;i<numColsDirect;i++){ norm1 += rightHandSide[i]*rightHandSide[i]; norm2 += rightHandSideTmp[i]*rightHandSideTmp[i]; } norm1 = sqrt(norm1); norm2 = sqrt(norm2); if(norm1>0.0 && modifyTol){ tol2 = tol*norm2/norm1; fprintf(stderr, "%s: Aiming for tolerance %g, rather than the specified %g!\n", fctName,tol2,tol); } else tol2 = tol; /* Now solve PAx = y */ iter = gmres(solutionVector, rightHandSideTmp, numColsDirect, tol2, maxiter, fullMatrixMultiply, (void*) bundleData); FREE(rightHandSideTmp); } printf("%s: GMRES used %d iterations\n",fctName,iter); /* Test the solution making a matrix-vector product * (no preconditioning) A*x and comparing with the right-hand-side */ if (testSolution) { bundleData->precondType = "none"; testVector = (double*) calloc(numColsDirect,sizeof(double)); fullMatrixMultiply(solutionVector,testVector,numColsDirect, (void*)bundleData); for (i=0, err=0.0; i<numColsDirect; i++) err += (testVector[i]-rightHandSide[i]) *(testVector[i]-rightHandSide[i]); err=sqrt(err); printf(" Estimated ||err||: %g\n",err); } /* TEST SECTION */ if (0){ if (0){ for (i=0; i<numColsDirect; i++){ printf("%g %g\n",solutionVector[i],rightHandSide[i]); } } /*dumpSparseMatrixTranspose(precondMat,0);*/ if (0){ int j; for (j=0;j<numColsDirect;j++){ for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0; testVector[j]=1; fullMatrixMultiply(testVector,solutionVector,numColsDirect, (void*)bundleData); printf("Column %d: \n",j); for (i=0; i<numColsDirect; i++) printf(" %12.6e",solutionVector[i]); printf("\n"); } } if (0){/* Dump transpose matrix to file: */ int j; FILE *dumpFile; dumpFile = fopen("mat.out","w"); printf("Writing to mat.out\n"); for (j=0;j<numColsDirect;j++){ for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0; testVector[j]=1; fullMatrixMultiply(testVector,solutionVector,numColsDirect, (void*)bundleData); for (i=0; i<numColsDirect; i++) fprintf(dumpFile," %15.8e",solutionVector[i]); fprintf(dumpFile,"\n"); } fclose(dumpFile); } if (0){ /* Calculate and write A*e_1: */ int j; FILE *dumpFile; dumpFile = fopen("a_times_e1.out","w"); j=0; for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0; testVector[j]=1; fullMatrixMultiply(testVector,solutionVector,numColsDirect, (void*)bundleData); printf("Writing to %s\n","a_times_e1.out"); for (i=0; i<numColsDirect; i++) fprintf(dumpFile," %12.6e\n",solutionVector[i]); } } /* Test a preconditioner. For a small system P*A=I, so PA*b=b for (i=0; i<numColsDirect; i++) testVector[i]=solutionVector[i]=0.0; bundleData->precondType = "none"; testVector = (double*) calloc(numColsDirect,sizeof(double)); fullMatrixMultiply(rightHandSide,testVector,numColsDirect, (void*)bundleData); spMatMultiply(precondMat,testVector,solutionVector,matNum); for (i=0; i<numColsDirect; i++) printf(" %16.8e %16.8e %16.8e \n",rightHandSide[i],solutionVector[i],testVector[i]); */ /* Free working vector and other stuff used. */ IFFREE(testVector); /*FREE(solutionVector); FREE(rightHandSide);*/ if (strcmp(precondType,"right")==0){ FREE(bundleData->workSrc); } else if (strcmp(precondType,"left")==0){ FREE(bundleData->workEval); } return; /* Should probably return (a pointer to) the solution vector? */ } /* End of routine solveFull */
//============================================================================= int main(int argc, char** argv) { /////////////////////////////////////////////// //////////////// set precision //////////////// /////////////////////////////////////////////// std::cout.precision(9); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout.setf(std::ios::showpos); std::cerr.precision(3); std::cerr.setf(std::ios::scientific, std::ios::floatfield); std::cerr.setf(std::ios::showpos); /////////////////////////////////////////////// //////////////////// A,b ////////////////////// /////////////////////////////////////////////// CPPL::dgsmatrix A; CPPL::dcovector b; //const bool file =true; const bool file =false; /////////////////////////// ////////// read /////////// /////////////////////////// if(file){ A.read("A8.dge"); b.read("b8.dco"); //A.read("A10.dge"); b.read("b10.dco"); //A.read("A44.dge"); b.read("b44.dco"); } /////////////////////////// ///////// random ////////// /////////////////////////// else{//file==false std::cerr << "# making random matrix" << std::endl; const int size(1000); A.resize(size,size); b.resize(size); srand(time(NULL)); for(int i=0; i<size; i++){ for(int j=0; j<size; j++){ if( rand()%5==0 ){ A(i,j) =(double(rand())/double(RAND_MAX))*2.0 -1.0; } } A(i,i) +=1e-2*double(size);//generalization b(i) =(double(rand())/double(RAND_MAX))*1. -0.5; } A.write("A.dge"); b.write("b.dco"); } /////////////////////////////////////////////// ////////////////// direct ///////////////////// /////////////////////////////////////////////// std::cerr << "# making solution with dgesv" << std::endl; CPPL::dgematrix A2(A.to_dgematrix()); CPPL::dcovector b2(b); A2.dgesv(b2); b2.write("ans.dco"); /////////////////////////////////////////////// ///////////////// iterative /////////////////// /////////////////////////////////////////////// //////// initial x //////// CPPL::dcovector x(b.l); //x.read("x_ini8.dco"); x.zero(); //////// eps //////// double eps(fabs(damax(b))*1e-6); std::cerr << "eps=" << eps << std::endl; //////// solve //////// if( gmres(A, b, x, eps) ){ std::cerr << "failed." << std::endl; x.write("x.dco"); exit(1); } x.write("x.dco"); std::cerr << "fabs(damax(err))=" << fabs(damax(b2-x)) << std::endl; return 0; }
void jdher(int n, int lda, double tau, double tol, int kmax, int jmax, int jmin, int itmax, int blksize, int blkwise, int V0dim, _Complex double *V0, int solver_flag, int linitmax, double eps_tr, double toldecay, int verbosity, int *k_conv, _Complex double *Q, double *lambda, int *it, int maxmin, int shift_mode, matrix_mult A_psi) { /**************************************************************************** * * * Local variables * * * ****************************************************************************/ /* constants */ /* allocatables: * initialize with NULL, so we can free even unallocated ptrs */ double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL; _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1; int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL; /* non-allocated ptrs */ _Complex double *q, *v, *u, *r = NULL; /* _Complex double *matdummy, *vecdummy; */ /* scalar vars */ double theta, alpha, it_tol; int i, k, j, actblksize, eigworklen, found, conv, keep, n2; int act, cnt, idummy, info, CntCorrIts=0, endflag=0; int N = n*sizeof(_Complex double)/sizeof(spinor); /* variables for random number generator */ int IDIST = 1; int ISEED[4] = {2, 3, 5, 7}; ISEED[0] = g_proc_id+2; /**************************************************************************** * * * Execution starts here... * * * ****************************************************************************/ /* print info header */ if ((verbosity > 2) && (g_proc_id == 0)){ printf("Jacobi-Davidson method for hermitian Matrices\n"); printf("Solving A*x = lambda*x \n\n"); printf(" N= %10d ITMAX=%4d\n", n, itmax); printf(" KMAX=%3d JMIN=%3d JMAX=%3d V0DIM=%3d\n", kmax, jmin, jmax, V0dim); printf(" BLKSIZE= %2d BLKWISE= %5s\n", blksize, blkwise ? "TRUE" : "FALSE"); printf(" TOL= %11.4e TAU= %11.4e\n", tol, tau); printf(" LINITMAX= %5d EPS_TR= %10.3e TOLDECAY=%9.2e\n", linitmax, eps_tr, toldecay); printf("\n Computing %s eigenvalues\n", maxmin ? "maximal" : "minimal"); printf("\n"); fflush( stdout ); } /* validate input parameters */ if(tol <= 0) jderrorhandler(401,""); if(kmax <= 0 || kmax > n) jderrorhandler(402,""); if(jmax <= 0 || jmax > n) jderrorhandler(403,""); if(jmin <= 0 || jmin > jmax) jderrorhandler(404,""); if(itmax < 0) jderrorhandler(405,""); if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,""); if(blksize <= 0 || blksize > kmax) jderrorhandler(406,""); if(blkwise < 0 || blkwise > 1) jderrorhandler(407,""); if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,""); if(linitmax < 0) jderrorhandler(409,""); if(eps_tr < 0.) jderrorhandler(500,""); if(toldecay <= 1.0) jderrorhandler(501,""); CONE = 1.; CZERO = 0.; CMONE = -1.; /* Get hardware-dependent values: * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt. * block size... */ eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax; /* Allocating memory for matrices & vectors */ if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) { errno = 0; jderrorhandler(300,"V in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) V = (_Complex double*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE); #else V = V_; #endif if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"U in jdher"); } if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) { jderrorhandler(300,"s in jdher"); } if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Res in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) Res = (_Complex double*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE); #else Res = Res_; #endif if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) { jderrorhandler(300,"resnrm in jdher"); } if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) { jderrorhandler(300,"resnrm_old in jdher"); } if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"M in jdher"); } if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Vtmp in jdher"); } if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"p_work in jdher"); } /* ... */ if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx1 in jdher"); } if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx2 in jdher"); } /* Indices for (non-)converged approximations */ if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"convind in jdher"); } if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"keepind in jdher"); } if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"solvestep in jdher"); } if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"actcorrits in jdher"); } if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"eigwork in jdher"); } if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) { jderrorhandler(300,"rwork in jdher"); } if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"temp1 in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) temp1 = (_Complex double*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE); #else temp1 = temp1_; #endif if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"dtemp in jdher"); } /* Set variables for Projection routines */ n2 = 2*n; p_n = n; p_n2 = n2; p_Q = Q; p_A_psi = A_psi; p_lda = lda; /************************************************************************** * * * Generate initial search subspace V. Vectors are taken from V0 and if * * necessary randomly generated. * * * **************************************************************************/ /* copy V0 to V */ _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1); j = V0dim; /* if V0dim < blksize: generate additional random vectors */ if (V0dim < blksize) { idummy = (blksize - V0dim)*n; /* nof random numbers */ _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda); j = blksize; } for (cnt = 0; cnt < j; cnt ++) { ModifiedGS(V + cnt*lda, n, cnt, V, lda); alpha = sqrt(square_norm((spinor*)(V+cnt*lda), N, 1)); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE); } /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle is computed. */ for (cnt = 0; cnt < j; cnt++){ A_psi((spinor*) temp1, (spinor*)(V+cnt*lda)); idummy = cnt+1; for(i = 0; i < idummy; i++){ M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } } /* Other initializations */ k = 0; (*it) = 0; if((*k_conv) > 0) { k = *k_conv; } actblksize = blksize; for(act = 0; act < blksize; act ++){ solvestep[act] = 1; } /**************************************************************************** * * * Main JD-iteration loop * * * ****************************************************************************/ while((*it) < itmax) { /**************************************************************************** * * * Solving the projected eigenproblem * * * * M*u = V^dagger*A*V*u = s*u * * M is hermitian, only the upper triangle is stored * * * ****************************************************************************/ _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1); _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); if (info != 0) { printf("error solving the projected eigenproblem."); printf(" zheev: info = %d\n", info); } if(info != 0) jderrorhandler(502,"proble in zheev"); /* Reverse order of eigenvalues if maximal value is needed */ if(maxmin == 1){ sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); } else{ sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); } /**************************************************************************** * * * Convergence/Restart Check * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * * In case of a restart update the V, U and M matrices and recompute the * * Eigenvectors * * * ****************************************************************************/ found = 1; while(found) { /* conv/keep = Number of converged/non-converged Approximations */ conv = 0; keep = 0; for(act=0; act < actblksize; act++){ /* Setting pointers for single vectors */ q = Q + (act+k)*lda; u = U + act*jmax; r = Res + act*lda; /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */ theta = s[act]; _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1); /* Compute the residual */ A_psi((spinor*) r, (spinor*) q); theta = -theta; _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); /* Compute norm of the residual and update arrays convind/keepind*/ resnrm_old[act] = resnrm[act]; resnrm[act] = sqrt(square_norm((spinor*) r, N, 1)); if (resnrm[act] < tol){ convind[conv] = act; conv = conv + 1; } else{ keepind[keep] = act; keep = keep + 1; } } /* for(act = 0; act < actblksize; act ++) */ /* Check whether the blkwise-mode is chosen and ALL the approximations converged, or whether the strip-off mode is active and SOME of the approximations converged */ found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) && (j > actblksize || k == kmax - actblksize); /*************************************************************************** * * * Convergence Case * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * **************************************************************************/ if (found) { /* Store Eigenvalues */ for(act = 0; act < conv; act++) lambda[k+act] = s[convind[act]]; /* Re-use non approximated Ritz-Values */ for(act = 0; act < keep; act++) s[act] = s[keepind[act]]; /* Shift the others in the right position */ for(act = 0; act < (j-actblksize); act ++) s[act+keep] = s[act+actblksize]; /* Update V. Re-use the V-Vectors not looked at yet. */ idummy = j - actblksize; for (act = 0; act < n; act = act + jmax) { cnt = act + jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1); } /* Insert the not converged approximations as first columns in V */ for(act = 0; act < keep; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1); } /* Store Eigenvectors */ for(act = 0; act < conv; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1); } /* Update SearchSpaceSize j */ j = j - conv; /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; /* ... and U the Identity(jnew,jnew) */ _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); if(shift_mode == 1){ if(maxmin == 0){ for(act = 0; act < conv; act ++){ if (lambda[k+act] > tau){ tau = lambda[k+act]; } } } else{ for(act = 0; act < conv; act ++){ if (lambda[k+act] < tau){ tau = lambda[k+act]; } } } } /* Update Converged-Eigenpair-counter and Pro_k */ k = k + conv; /* Update the new blocksize */ actblksize=min(blksize, kmax-k); /* Exit main iteration loop when kmax eigenpairs have been approximated */ if (k == kmax){ endflag = 1; break; } /* Counter for the linear-solver-accuracy */ for(act = 0; act < keep; act++) solvestep[act] = solvestep[keepind[act]]; /* Now we expect to have the next eigenvalues */ /* allready with some accuracy */ /* So we do not need to start from scratch... */ for(act = keep; act < blksize; act++) solvestep[act] = 1; } /* if(found) */ if(endflag == 1){ break; } /************************************************************************** * * * Restart * * * * The Eigenvector-Aproximations corresponding to the first jmin * * Petrov-Vectors are kept. if (j+actblksize > jmax) { * * * **************************************************************************/ if (j+actblksize > jmax) { idummy = j; j = jmin; for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */ cnt = act+jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1); } _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; } } /* while(found) */ if(endflag == 1){ break; } /**************************************************************************** * * * Solving the correction equations * * * * * ****************************************************************************/ /* Solve actblksize times the correction equation ... */ for (act = 0; act < actblksize; act ++) { /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */ v = V + j*lda; for (cnt = 0; cnt < n; cnt ++){ v[cnt] = 0.; } /* Adaptive accuracy and shift for the lin.solver. In case the residual is big, we don't need a too precise solution for the correction equation, since even in exact arithmetic the solution wouldn't be too usefull for the Eigenproblem. */ r = Res + act*lda; if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){ p_theta = s[act]; } else{ p_theta = tau; } p_k = k + actblksize; /* if we are in blockwise mode, we do not want to */ /* iterate solutions much more, if they have */ /* allready the desired precision */ if(blkwise == 1 && resnrm[act] < tol) { it_tol = pow(toldecay, (double)(-5)); } else { it_tol = pow(toldecay, (double)(-solvestep[act])); } solvestep[act] = solvestep[act] + 1; /* equation and project if necessary */ ModifiedGS(r, n, k + actblksize, Q, lda); /* Solve the correction equation ... */ g_sloppy_precision = 1; if(solver_flag == GMRES){ /* info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, &Proj_A_psi, &Proj_A_psi); */ info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } if(solver_flag == CGS){ info = cgs_real((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == BICGSTAB){ info = bicgstab_complex((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == CG){ info = cg_her((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else{ info = gmres((spinor*) v, (spinor*) r, 10, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } g_sloppy_precision = 0; /* Actualizing profiling data */ if (info == -1){ CntCorrIts += linitmax; } else{ CntCorrIts += info; } actcorrits[act] = info; /* orthonormalize v to Q, cause the implicit orthogonalization in the solvers may be too inaccurate. Then apply "IteratedCGS" to prevent numerical breakdown in order to orthogonalize v to V */ ModifiedGS(v, n, k+actblksize, Q, lda); IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double*) v, &ONE); /* update interaction matrix M */ A_psi((spinor*) temp1, (spinor*) v); idummy = j+1; for(i = 0; i < idummy; i++) { M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } /* Increasing SearchSpaceSize j */ j ++; } /* for (act = 0;act < actblksize; act ++) */ /* Print information line */ if(g_proc_id == 0) { print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, s, resnrm, actcorrits); } /* Increase iteration-counter for outer loop */ (*it) = (*it) + 1; } /* Main iteration loop */ /****************************************************************** * * * Eigensolutions converged or iteration limit reached * * * * Print statistics. Free memory. Return. * * * ******************************************************************/ (*k_conv) = k; if (g_proc_id == 0 && verbosity > 0) { printf("\nJDHER execution statistics\n\n"); printf("IT_OUTER=%d IT_INNER_TOT=%d IT_INNER_AVG=%8.2f\n", (*it), CntCorrIts, (double)CntCorrIts/(*it)); printf("\nConverged eigensolutions in order of convergence:\n"); printf("\n I LAMBDA(I) RES(I)\n"); printf("---------------------------------------\n"); } for (act = 0; act < *k_conv; act ++) { /* Compute the residual for solution act */ q = Q + act*lda; theta = -lambda[act]; A_psi((spinor*) r, (spinor*) q); _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); alpha = sqrt(square_norm((spinor*) r, N, 1)); if(g_proc_id == 0 && verbosity > 0) { printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha); } } if(g_proc_id == 0 && verbosity > 0) { printf("\n"); fflush( stdout ); } free(V_); free(Vtmp); free(U); free(s); free(Res_); free(resnrm); free(resnrm_old); free(M); free(Z); free(eigwork); free(temp1_); free(dtemp); free(rwork); free(p_work); free(idx1); free(idx2); free(convind); free(keepind); free(solvestep); free(actcorrits); } /* jdher(.....) */
void RBEC::stepForward() { int i, j, k, l; int n_dof = fem_space.n_dof(); int n_total_dof = 2 * n_dof; mat_RBEC.reinit(sp_RBEC); mat_rere.reinit(sp_rere); mat_reim.reinit(sp_reim); mat_imre.reinit(sp_imre); mat_imim.reinit(sp_imim); Vector<double> phi(n_total_dof); FEMFunction <double, DIM> phi_star(fem_space); Vector<double> rhs(n_total_dof); Potential V(gamma_x, gamma_y); /// 准备一个遍历全部单元的迭代器. FEMSpace<double, DIM>::ElementIterator the_element = fem_space.beginElement(); FEMSpace<double, DIM>::ElementIterator end_element = fem_space.endElement(); /// 循环遍历全部单元, 只是为了统计每一行的非零元个数. for (; the_element != end_element; ++the_element) { /// 当前单元信息. double volume = the_element->templateElement().volume(); const QuadratureInfo<DIM>& quad_info = the_element->findQuadratureInfo(6); std::vector<double> jacobian = the_element->local_to_global_jacobian(quad_info.quadraturePoint()); int n_quadrature_point = quad_info.n_quadraturePoint(); std::vector<AFEPack::Point<DIM> > q_point = the_element->local_to_global(quad_info.quadraturePoint()); /// 单元信息. std::vector<std::vector<std::vector<double> > > basis_gradient = the_element->basis_function_gradient(q_point); std::vector<std::vector<double> > basis_value = the_element->basis_function_value(q_point); std::vector<double> phi_re_value = phi_re.value(q_point, *the_element); std::vector<double> phi_im_value = phi_im.value(q_point, *the_element); const std::vector<int>& element_dof = the_element->dof(); int n_element_dof = the_element->n_dof(); /// 实际拼装. for (l = 0; l < n_quadrature_point; ++l) { double Jxw = quad_info.weight(l) * jacobian[l] * volume; for (j = 0; j < n_element_dof; ++j) { for (k = 0; k < n_element_dof; ++k) { double cont = Jxw * ((1 / dt) * basis_value[j][l] * basis_value[k][l] + 0.5 * innerProduct(basis_gradient[j][l], basis_gradient[k][l]) + V.value(q_point[l]) * basis_value[j][l] * basis_value[k][l] + beta * (phi_re_value[l] * phi_re_value[l] + phi_im_value[l] * phi_im_value[l]) * basis_value[j][l] * basis_value[k][l]); mat_RBEC.add(element_dof[j], element_dof[k], cont); mat_RBEC.add(element_dof[j] + n_dof, element_dof[k] + n_dof, cont); } rhs(element_dof[j]) += Jxw * phi_re_value[l] * basis_value[j][l] / dt; rhs(element_dof[j] + n_dof) += Jxw * phi_im_value[l] * basis_value[j][l] / dt; } } } FEMFunction<double, DIM> _phi_re(phi_re); FEMFunction<double, DIM> _phi_im(phi_im); boundaryValue(phi, rhs, mat_RBEC); // AMGSolver solver(mat_RBEC); // solver.solve(phi, rhs); dealii::SolverControl solver_control(4000, 1e-15); SolverGMRES<Vector<double> >::AdditionalData para(500, false, true); SolverGMRES<Vector<double> > gmres(solver_control, para); gmres.solve(mat_RBEC, phi, rhs, PreconditionIdentity()); for (int i = 0; i < n_dof; ++i) { phi_re(i) = phi(i); phi_im(i) = phi(n_dof + i); } for (int i = 0; i < n_dof; ++i) phi_star(i) = sqrt(phi_re(i) * phi_re(i) + phi_im(i) * phi_im(i)); double L2Phi = Functional::L2Norm(phi_re, 6); std::cout << "L2 norm = " << L2Phi << std::endl; for (int i = 0; i < n_dof; ++i) { phi_re(i) /= L2Phi; phi_im(i) /= L2Phi; } double e = energy(phi_re, phi_im, 6); std::cout << "Energy = " << e << std::endl; t += dt; };
/* In case there is no lapack use normal gmres */ int gmres_dr(spinor * const P,spinor * const Q, const int m, const int nr_ev, const int max_restarts, const double eps_sq, const int rel_prec, const int N, matrix_mult f){ return(gmres(P, Q, m, max_restarts, eps_sq, rel_prec, N, 1, f)); }