CAMLprim value sunml_spils_modified_gs(value vv, value vh, value vk, value vp) { CAMLparam4(vv, vh, vk, vp); int p = Int_val(vp); int k = Int_val(vk); int i; int i0 = SUNMAX(k-p, 0); realtype new_vk_norm; N_Vector* v; #if SUNDIALS_ML_SAFE == 1 struct caml_ba_array *bh = ARRAY2_DATA(vh); intnat hn = bh->dim[0]; intnat hm = bh->dim[1]; if (hn < k + 1) caml_invalid_argument("modified_gs: h is too small (dim1 < k + 1)."); if (hm < k) caml_invalid_argument("modified_gs: h is too small (dim2 < k)."); if (Wosize_val (vv) < k + 1) caml_invalid_argument("modified_gs: v is too small (< k + 1)."); #endif v = calloc(k + 1, sizeof(N_Vector)); if (v == NULL) caml_raise_out_of_memory(); for (i = i0; i <= k; ++i) v[i] = NVEC_VAL(Field(vv, i)); ModifiedGS(v, ARRAY2_ACOLS(vh), k, p, &new_vk_norm); free(v); CAMLreturn(caml_copy_double(new_vk_norm)); }
int SpgmrSolve(SpgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, real delta, int max_restarts, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, real *res_norm, int *nli, int *nps) { N_Vector *V, xcor, vtemp; real **Hes, *givens, *yg; real s_r0_norm, beta, rotation_product, r_norm, s_product, rho; boole preOnLeft, preOnRight, scale_x, scale_b, converged; int i, j, k, l, l_plus_1, l_max, krydim, ier, ntries; if (mem == NULL) return(SPGMR_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; V = mem->V; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize converged flag */ if (max_restarts < 0) max_restarts = 0; if ((pretype != LEFT) && (pretype != RIGHT) && (pretype != BOTH)) pretype = NONE; preOnLeft = ((pretype == LEFT) || (pretype == BOTH)); preOnRight = ((pretype == RIGHT) || (pretype == BOTH)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { if (atimes(A_data, x, vtemp) != 0) return(SPGMR_ATIMES_FAIL); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } N_VScale(ONE, vtemp, V[0]); /* Apply b-scaling to vtemp, get L2 norm of sb r_0, and return if small */ /* if (scale_b) N_VProd(sb, vtemp, vtemp); s_r0_norm = RSqrt(N_VDotProd(vtemp, vtemp)); if (s_r0_norm <= delta) return(SPGMR_SUCCESS); */ /* Apply left preconditioner and b-scaling to V[0] = r_0 */ if (preOnLeft) { ier = psolve(P_data, V[0], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[0], vtemp); } if (scale_b) { N_VProd(sb, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = sb P1_inv r_0, and return if small */ *res_norm = r_norm = beta = RSqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPGMR_SUCCESS); /* Set xcor = 0 */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts */ for (ntries = 0; ntries <= max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i <= l_max; i++) for (j=0; j < l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis */ for(l=0; l < l_max; l++) { (*nli)++; krydim = l_plus_1 = l + 1; /* Generate A-tilde V[l], where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv V[l] */ if (scale_x) { N_VDiv(V[l], sx, vtemp); } else { N_VScale(ONE, V[l], vtemp); } /* Apply right precoditioner: vtemp = P2_inv sx_inv V[l] */ N_VScale(ONE, vtemp, V[l_plus_1]); if (preOnRight) { ier = psolve(P_data, V[l_plus_1], vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } /* Apply A: V[l+1] = A P2_inv sx_inv V[l] */ if (atimes(A_data, vtemp, V[l_plus_1] ) != 0) return(SPGMR_ATIMES_FAIL); /* Apply left preconditioning: vtemp = P1_inv A P2_inv sx_inv V[l] */ if (preOnLeft) { ier = psolve(P_data, V[l_plus_1], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[l_plus_1], vtemp); } /* Apply b-scaling: V[l+1] = sb P1_inv A P2_inv sx_inv V[l] */ if (scale_b) { N_VProd(sb, vtemp, V[l_plus_1]); } else { N_VScale(ONE, vtemp, V[l_plus_1]); } /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l]), vtemp, yg) != 0) return(SPGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l])) != 0) return(SPGMR_GS_FAIL); } /* Update the QR factorization of Hes */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes */ rotation_product *= givens[2*l+1]; if ((*res_norm = rho = ABS(rotation_product*r_norm)) <= delta) { converged = TRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt */ N_VScale(ONE/Hes[l_plus_1][l], V[l_plus_1], V[l_plus_1]); } /* Inner loop is done. Compute the new correction vector xcor */ /* Construct g, then solve for y */ yg[0] = r_norm; for (i=1; i <= krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPGMR_QRSOL_FAIL); /* Add correction vector V_l y to xcor */ for (k=0; k < krydim; k++) N_VLinearSum(yg[k], V[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x */ if (converged) { /* Apply x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add correction to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart */ if (ntries == max_restarts) break; /* Construct last column of Q in yg */ s_product = ONE; for (i=krydim; i > 0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg */ r_norm *= s_product; for (i=0; i <= krydim; i++) yg[i] *= r_norm; r_norm = ABS(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart */ N_VScale(yg[0], V[0], V[0]); for( k=1; k <= krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { /* Apply the x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add vtemp to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_RES_REDUCED); } return(SPGMR_CONV_FAIL); }
/*---------------------------------------------------------------- Function : SpfgmrSolve ---------------------------------------------------------------*/ int SpfgmrSolve(SpfgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, realtype delta, int max_restarts, int maxit, void *P_data, N_Vector s1, N_Vector s2, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { N_Vector *V, *Z, xcor, vtemp; realtype **Hes, *givens, *yg; realtype beta, rotation_product, r_norm, s_product, rho; booleantype preOnRight, scale1, scale2, converged; int i, j, k, l, l_max, krydim, ier, ntries; if (mem == NULL) return(SPFGMR_MEM_NULL); /* Initialize some variables */ krydim = 0; /* Make local copies of mem variables. */ l_max = mem->l_max; V = mem->V; Z = mem->Z; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = SUNFALSE; /* Initialize converged flag */ /* If maxit is greater than l_max, then set maxit=l_max */ if (maxit > l_max) maxit = l_max; /* Check for legal value of max_restarts */ if (max_restarts < 0) max_restarts = 0; /* Set preconditioning flag (enabling any preconditioner implies right preconditioning, since FGMRES does not support left preconditioning) */ preOnRight = ((pretype == PREC_RIGHT) || (pretype == PREC_BOTH) || (pretype == PREC_LEFT)); /* Set scaling flags */ scale1 = (s1 != NULL); scale2 = (s2 != NULL); /* Set vtemp to initial (unscaled) residual r_0 = b - A*x_0. */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { ier = atimes(A_data, x, vtemp); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } /* Apply left scaling to vtemp = r_0 to fill V[0]. */ if (scale1) { N_VProd(s1, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = s1 r_0, and return if small */ *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPFGMR_SUCCESS); /* Initialize rho to avoid compiler warning message */ rho = beta; /* Set xcor = 0. */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts. */ for (ntries=0; ntries<=max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i<=l_max; i++) for (j=0; j<l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis. */ for (l=0; l<maxit; l++) { (*nli)++; krydim = l + 1; /* Generate A-tilde V[l], where A-tilde = s1 A P_inv s2_inv. */ /* Apply right scaling: vtemp = s2_inv V[l]. */ if (scale2) N_VDiv(V[l], s2, vtemp); else N_VScale(ONE, V[l], vtemp); /* Apply right preconditioner: vtemp = Z[l] = P_inv s2_inv V[l]. */ if (preOnRight) { N_VScale(ONE, vtemp, V[l+1]); ier = psolve(P_data, V[l+1], vtemp, delta, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPFGMR_PSOLVE_FAIL_UNREC : SPFGMR_PSOLVE_FAIL_REC); } N_VScale(ONE, vtemp, Z[l]); /* Apply A: V[l+1] = A P_inv s2_inv V[l]. */ ier = atimes(A_data, vtemp, V[l+1]); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); /* Apply left scaling: V[l+1] = s1 A P_inv s2_inv V[l]. */ if (scale1) N_VProd(s1, V[l+1], V[l+1]); /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l+1, l_max, &(Hes[l+1][l]), vtemp, yg) != 0) return(SPFGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l+1, l_max, &(Hes[l+1][l])) != 0) return(SPFGMR_GS_FAIL); } /* Update the QR factorization of Hes. */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPFGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes. */ rotation_product *= givens[2*l+1]; *res_norm = rho = SUNRabs(rotation_product*r_norm); if (rho <= delta) { converged = SUNTRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt routine. */ N_VScale(ONE/Hes[l+1][l], V[l+1], V[l+1]); } /* Inner loop is done. Compute the new correction vector xcor. */ /* Construct g, then solve for y. */ yg[0] = r_norm; for (i=1; i<=krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPFGMR_QRSOL_FAIL); /* Add correction vector Z_l y to xcor. */ for (k=0; k<krydim; k++) N_VLinearSum(yg[k], Z[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x and return. */ if (converged) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart. */ if (ntries == max_restarts) break; /* Construct last column of Q in yg. */ s_product = ONE; for (i=krydim; i>0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg. */ r_norm *= s_product; for (i=0; i<=krydim; i++) yg[i] *= r_norm; r_norm = SUNRabs(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart. */ N_VScale(yg[0], V[0], V[0]); for (k=1; k<=krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_RES_REDUCED); } return(SPFGMR_CONV_FAIL); }
void jdher(int n, int lda, double tau, double tol, int kmax, int jmax, int jmin, int itmax, int blksize, int blkwise, int V0dim, _Complex double *V0, int solver_flag, int linitmax, double eps_tr, double toldecay, int verbosity, int *k_conv, _Complex double *Q, double *lambda, int *it, int maxmin, int shift_mode, matrix_mult A_psi) { /**************************************************************************** * * * Local variables * * * ****************************************************************************/ /* constants */ /* allocatables: * initialize with NULL, so we can free even unallocated ptrs */ double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL; _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1; int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL; /* non-allocated ptrs */ _Complex double *q, *v, *u, *r = NULL; /* _Complex double *matdummy, *vecdummy; */ /* scalar vars */ double theta, alpha, it_tol; int i, k, j, actblksize, eigworklen, found, conv, keep, n2; int act, cnt, idummy, info, CntCorrIts=0, endflag=0; int N = n*sizeof(_Complex double)/sizeof(spinor); /* variables for random number generator */ int IDIST = 1; int ISEED[4] = {2, 3, 5, 7}; ISEED[0] = g_proc_id+2; /**************************************************************************** * * * Execution starts here... * * * ****************************************************************************/ /* print info header */ if ((verbosity > 2) && (g_proc_id == 0)){ printf("Jacobi-Davidson method for hermitian Matrices\n"); printf("Solving A*x = lambda*x \n\n"); printf(" N= %10d ITMAX=%4d\n", n, itmax); printf(" KMAX=%3d JMIN=%3d JMAX=%3d V0DIM=%3d\n", kmax, jmin, jmax, V0dim); printf(" BLKSIZE= %2d BLKWISE= %5s\n", blksize, blkwise ? "TRUE" : "FALSE"); printf(" TOL= %11.4e TAU= %11.4e\n", tol, tau); printf(" LINITMAX= %5d EPS_TR= %10.3e TOLDECAY=%9.2e\n", linitmax, eps_tr, toldecay); printf("\n Computing %s eigenvalues\n", maxmin ? "maximal" : "minimal"); printf("\n"); fflush( stdout ); } /* validate input parameters */ if(tol <= 0) jderrorhandler(401,""); if(kmax <= 0 || kmax > n) jderrorhandler(402,""); if(jmax <= 0 || jmax > n) jderrorhandler(403,""); if(jmin <= 0 || jmin > jmax) jderrorhandler(404,""); if(itmax < 0) jderrorhandler(405,""); if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,""); if(blksize <= 0 || blksize > kmax) jderrorhandler(406,""); if(blkwise < 0 || blkwise > 1) jderrorhandler(407,""); if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,""); if(linitmax < 0) jderrorhandler(409,""); if(eps_tr < 0.) jderrorhandler(500,""); if(toldecay <= 1.0) jderrorhandler(501,""); CONE = 1.; CZERO = 0.; CMONE = -1.; /* Get hardware-dependent values: * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt. * block size... */ eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax; /* Allocating memory for matrices & vectors */ if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) { errno = 0; jderrorhandler(300,"V in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) V = (_Complex double*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE); #else V = V_; #endif if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"U in jdher"); } if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) { jderrorhandler(300,"s in jdher"); } if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Res in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) Res = (_Complex double*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE); #else Res = Res_; #endif if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) { jderrorhandler(300,"resnrm in jdher"); } if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) { jderrorhandler(300,"resnrm_old in jdher"); } if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"M in jdher"); } if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Vtmp in jdher"); } if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"p_work in jdher"); } /* ... */ if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx1 in jdher"); } if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx2 in jdher"); } /* Indices for (non-)converged approximations */ if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"convind in jdher"); } if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"keepind in jdher"); } if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"solvestep in jdher"); } if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"actcorrits in jdher"); } if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"eigwork in jdher"); } if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) { jderrorhandler(300,"rwork in jdher"); } if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"temp1 in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) temp1 = (_Complex double*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE); #else temp1 = temp1_; #endif if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"dtemp in jdher"); } /* Set variables for Projection routines */ n2 = 2*n; p_n = n; p_n2 = n2; p_Q = Q; p_A_psi = A_psi; p_lda = lda; /************************************************************************** * * * Generate initial search subspace V. Vectors are taken from V0 and if * * necessary randomly generated. * * * **************************************************************************/ /* copy V0 to V */ _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1); j = V0dim; /* if V0dim < blksize: generate additional random vectors */ if (V0dim < blksize) { idummy = (blksize - V0dim)*n; /* nof random numbers */ _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda); j = blksize; } for (cnt = 0; cnt < j; cnt ++) { ModifiedGS(V + cnt*lda, n, cnt, V, lda); alpha = sqrt(square_norm((spinor*)(V+cnt*lda), N, 1)); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE); } /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle is computed. */ for (cnt = 0; cnt < j; cnt++){ A_psi((spinor*) temp1, (spinor*)(V+cnt*lda)); idummy = cnt+1; for(i = 0; i < idummy; i++){ M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } } /* Other initializations */ k = 0; (*it) = 0; if((*k_conv) > 0) { k = *k_conv; } actblksize = blksize; for(act = 0; act < blksize; act ++){ solvestep[act] = 1; } /**************************************************************************** * * * Main JD-iteration loop * * * ****************************************************************************/ while((*it) < itmax) { /**************************************************************************** * * * Solving the projected eigenproblem * * * * M*u = V^dagger*A*V*u = s*u * * M is hermitian, only the upper triangle is stored * * * ****************************************************************************/ _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1); _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); if (info != 0) { printf("error solving the projected eigenproblem."); printf(" zheev: info = %d\n", info); } if(info != 0) jderrorhandler(502,"proble in zheev"); /* Reverse order of eigenvalues if maximal value is needed */ if(maxmin == 1){ sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); } else{ sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); } /**************************************************************************** * * * Convergence/Restart Check * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * * In case of a restart update the V, U and M matrices and recompute the * * Eigenvectors * * * ****************************************************************************/ found = 1; while(found) { /* conv/keep = Number of converged/non-converged Approximations */ conv = 0; keep = 0; for(act=0; act < actblksize; act++){ /* Setting pointers for single vectors */ q = Q + (act+k)*lda; u = U + act*jmax; r = Res + act*lda; /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */ theta = s[act]; _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1); /* Compute the residual */ A_psi((spinor*) r, (spinor*) q); theta = -theta; _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); /* Compute norm of the residual and update arrays convind/keepind*/ resnrm_old[act] = resnrm[act]; resnrm[act] = sqrt(square_norm((spinor*) r, N, 1)); if (resnrm[act] < tol){ convind[conv] = act; conv = conv + 1; } else{ keepind[keep] = act; keep = keep + 1; } } /* for(act = 0; act < actblksize; act ++) */ /* Check whether the blkwise-mode is chosen and ALL the approximations converged, or whether the strip-off mode is active and SOME of the approximations converged */ found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) && (j > actblksize || k == kmax - actblksize); /*************************************************************************** * * * Convergence Case * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * **************************************************************************/ if (found) { /* Store Eigenvalues */ for(act = 0; act < conv; act++) lambda[k+act] = s[convind[act]]; /* Re-use non approximated Ritz-Values */ for(act = 0; act < keep; act++) s[act] = s[keepind[act]]; /* Shift the others in the right position */ for(act = 0; act < (j-actblksize); act ++) s[act+keep] = s[act+actblksize]; /* Update V. Re-use the V-Vectors not looked at yet. */ idummy = j - actblksize; for (act = 0; act < n; act = act + jmax) { cnt = act + jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1); } /* Insert the not converged approximations as first columns in V */ for(act = 0; act < keep; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1); } /* Store Eigenvectors */ for(act = 0; act < conv; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1); } /* Update SearchSpaceSize j */ j = j - conv; /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; /* ... and U the Identity(jnew,jnew) */ _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); if(shift_mode == 1){ if(maxmin == 0){ for(act = 0; act < conv; act ++){ if (lambda[k+act] > tau){ tau = lambda[k+act]; } } } else{ for(act = 0; act < conv; act ++){ if (lambda[k+act] < tau){ tau = lambda[k+act]; } } } } /* Update Converged-Eigenpair-counter and Pro_k */ k = k + conv; /* Update the new blocksize */ actblksize=min(blksize, kmax-k); /* Exit main iteration loop when kmax eigenpairs have been approximated */ if (k == kmax){ endflag = 1; break; } /* Counter for the linear-solver-accuracy */ for(act = 0; act < keep; act++) solvestep[act] = solvestep[keepind[act]]; /* Now we expect to have the next eigenvalues */ /* allready with some accuracy */ /* So we do not need to start from scratch... */ for(act = keep; act < blksize; act++) solvestep[act] = 1; } /* if(found) */ if(endflag == 1){ break; } /************************************************************************** * * * Restart * * * * The Eigenvector-Aproximations corresponding to the first jmin * * Petrov-Vectors are kept. if (j+actblksize > jmax) { * * * **************************************************************************/ if (j+actblksize > jmax) { idummy = j; j = jmin; for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */ cnt = act+jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1); } _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; } } /* while(found) */ if(endflag == 1){ break; } /**************************************************************************** * * * Solving the correction equations * * * * * ****************************************************************************/ /* Solve actblksize times the correction equation ... */ for (act = 0; act < actblksize; act ++) { /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */ v = V + j*lda; for (cnt = 0; cnt < n; cnt ++){ v[cnt] = 0.; } /* Adaptive accuracy and shift for the lin.solver. In case the residual is big, we don't need a too precise solution for the correction equation, since even in exact arithmetic the solution wouldn't be too usefull for the Eigenproblem. */ r = Res + act*lda; if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){ p_theta = s[act]; } else{ p_theta = tau; } p_k = k + actblksize; /* if we are in blockwise mode, we do not want to */ /* iterate solutions much more, if they have */ /* allready the desired precision */ if(blkwise == 1 && resnrm[act] < tol) { it_tol = pow(toldecay, (double)(-5)); } else { it_tol = pow(toldecay, (double)(-solvestep[act])); } solvestep[act] = solvestep[act] + 1; /* equation and project if necessary */ ModifiedGS(r, n, k + actblksize, Q, lda); /* Solve the correction equation ... */ g_sloppy_precision = 1; if(solver_flag == GMRES){ /* info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, &Proj_A_psi, &Proj_A_psi); */ info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } if(solver_flag == CGS){ info = cgs_real((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == BICGSTAB){ info = bicgstab_complex((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == CG){ info = cg_her((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else{ info = gmres((spinor*) v, (spinor*) r, 10, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } g_sloppy_precision = 0; /* Actualizing profiling data */ if (info == -1){ CntCorrIts += linitmax; } else{ CntCorrIts += info; } actcorrits[act] = info; /* orthonormalize v to Q, cause the implicit orthogonalization in the solvers may be too inaccurate. Then apply "IteratedCGS" to prevent numerical breakdown in order to orthogonalize v to V */ ModifiedGS(v, n, k+actblksize, Q, lda); IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double*) v, &ONE); /* update interaction matrix M */ A_psi((spinor*) temp1, (spinor*) v); idummy = j+1; for(i = 0; i < idummy; i++) { M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } /* Increasing SearchSpaceSize j */ j ++; } /* for (act = 0;act < actblksize; act ++) */ /* Print information line */ if(g_proc_id == 0) { print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, s, resnrm, actcorrits); } /* Increase iteration-counter for outer loop */ (*it) = (*it) + 1; } /* Main iteration loop */ /****************************************************************** * * * Eigensolutions converged or iteration limit reached * * * * Print statistics. Free memory. Return. * * * ******************************************************************/ (*k_conv) = k; if (g_proc_id == 0 && verbosity > 0) { printf("\nJDHER execution statistics\n\n"); printf("IT_OUTER=%d IT_INNER_TOT=%d IT_INNER_AVG=%8.2f\n", (*it), CntCorrIts, (double)CntCorrIts/(*it)); printf("\nConverged eigensolutions in order of convergence:\n"); printf("\n I LAMBDA(I) RES(I)\n"); printf("---------------------------------------\n"); } for (act = 0; act < *k_conv; act ++) { /* Compute the residual for solution act */ q = Q + act*lda; theta = -lambda[act]; A_psi((spinor*) r, (spinor*) q); _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); alpha = sqrt(square_norm((spinor*) r, N, 1)); if(g_proc_id == 0 && verbosity > 0) { printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha); } } if(g_proc_id == 0 && verbosity > 0) { printf("\n"); fflush( stdout ); } free(V_); free(Vtmp); free(U); free(s); free(Res_); free(resnrm); free(resnrm_old); free(M); free(Z); free(eigwork); free(temp1_); free(dtemp); free(rwork); free(p_work); free(idx1); free(idx2); free(convind); free(keepind); free(solvestep); free(actcorrits); } /* jdher(.....) */
int gmres_dr(spinor * const P,spinor * const Q, const int m, const int nr_ev, const int max_restarts, const double eps_sq, const int rel_prec, const int N, matrix_mult f){ int restart=0, i, j, k, l; double beta, eps, norm, beta2=0.; complex *lswork = NULL; int lwork; complex tmp1, tmp2; int info=0; int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } double err=0.; spinor * r0, * x0; cmone.re = -1.; cmone.im=0.; cpone.re = 1.; cpone.im=0.; czero.re = 0.; czero.im = 0.; r0 = solver_field[0]; x0 = solver_field[2]; eps=sqrt(eps_sq); init_gmres_dr(m, (VOLUMEPLUSRAND)); norm = sqrt(square_norm(Q, N, 1)); assign(x0, P, N); /* first normal GMRES cycle */ /* r_0=Q-AP (b=Q, x+0=P) */ f(r0, x0); diff(r0, Q, r0, N); /* v_0=r_0/||r_0|| */ alpha[0].re=sqrt(square_norm(r0, N, 1)); err = alpha[0].re; if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); fflush(stdout); } if(alpha[0].re==0.){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } mul_r(V[0], 1./alpha[0].re, r0, N); for(j = 0; j < m; j++){ /* solver_field[0]=A*v_j */ /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ f(solver_field[1], V[j]); /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[i][j] = scalar_prod(V[i], solver_field[1], N, 1); /* G, work and work2 are in Fortran storage: columns first */ G[j][i] = H[i][j]; work2[j][i] = H[i][j]; work[i][j].re = H[i][j].re; work[i][j].im = -H[i][j].im; assign_diff_mul(solver_field[1], V[i], H[i][j], N); } _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.); G[j][j+1] = H[j+1][j]; work2[j][j+1] = H[j+1][j]; work[j+1][j].re = H[j+1][j].re; work[j+1][j].im = -H[j+1][j].im; beta2 = H[j+1][j].re*H[j+1][j].re; for(i = 0; i < j; i++){ tmp1 = H[i][j]; tmp2 = H[i+1][j]; _mult_real(H[i][j], tmp2, s[i]); _add_assign_complex_conj(H[i][j], c[i], tmp1); _mult_real(H[i+1][j], tmp1, s[i]); _diff_assign_complex(H[i+1][j], c[i], tmp2); } /* Set beta, s, c, alpha[j],[j+1] */ beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j])); s[j] = H[j+1][j].re / beta; _mult_real(c[j], H[j][j], 1./beta); _complex_set(H[j][j], beta, 0.); _mult_real(alpha[j+1], alpha[j], s[j]); tmp1 = alpha[j]; _mult_assign_complex_conj(alpha[j], c[j], tmp1); /* precision reached? */ if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); fflush(stdout); } if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); /* alpha[i] -= tmp1 */ _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(x0, V[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m+j); } /* if not */ else { mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); } } j=m-1; /* prepare for restart */ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[j].re, alpha[j].im); } for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } for(restart = 1; restart < max_restarts; restart++) { /* compute c-\bar H \alpha */ _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1); err = sqrt(short_scalar_prod(c, c, mp1).re); if(g_proc_id == 0 && g_debug_level > 0) { printf("%d\t %e short residue\n", m*restart, err*err); } /* Compute new residual r0 */ /* r_0=Q-AP (b=Q, x+0=P) */ if(g_debug_level > 0) { f(r0, x0); diff(r0, Q, r0, N); tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } mul(r0, c[0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(r0, V[i], c[i], N); } if(g_debug_level > 3) { tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } /* Stop if satisfied */ if(err < eps){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } /* Prepare to compute harmonic Ritz pairs */ for(i = 0; i < m-1; i++){ alpha[i].re = 0.; alpha[i].im = 0.; } alpha[m-1].re = 1.; alpha[m-1].im = 0.; _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); for(i = 0; i < m; i++) { G[m-1][i].re += (beta2*alpha[idx[i]-1].re); G[m-1][i].im += (beta2*alpha[idx[i]-1].im); } if(g_proc_id == 0 && g_debug_level > 3){ printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]); } /* c - \bar H * d -> c */ /* G contains H + \beta^2 H^-He_n e_n^H */ /* Compute harmonic Ritz pairs */ diagonalise_general_matrix(m, G[0], mp1, alpha, evalues); for(i = 0; i < m; i++) { sortarray[i] = _complex_square_norm(evalues[i]); idx[i] = i; } quicksort(m, sortarray, idx); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { for(i = 0; i < m; i++) { printf("# Evalues %d %e %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im); } fflush(stdout); } /* Copy the first nr_ev eigenvectors to work */ for(i = 0; i < ne; i++) { for(l = 0; l < m; l++) { work[i][l] = G[idx[i]][l]; } } /* Orthonormalize them */ for(i = 0; i < ne; i++) { work[i][m].re = 0.; work[i][m].im = 0.; short_ModifiedGS(work[i], m, i, work[0], mp1); } /* Orthonormalize c - \bar H d to work */ short_ModifiedGS(c, m+1, ne, work[0], mp1); for(i = 0; i < mp1; i++) { work[nr_ev][i] = c[i]; } /* Now compute \bar H = P^T_k+1 \bar H_m P_k */ for(i = 0; i < mp1; i++) { for(l = 0; l < mp1; l++) { H[i][l].re = 0.; H[i][l].im = 0.; } } _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1); if(g_debug_level > 3) { for(i = 0; i < ne+1; i++) { for(l = 0; l < ne+1; l++) { if(g_proc_id == 0) { printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, short_scalar_prod(work[i], work[l], m+1).im); printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, short_scalar_prod(work[l], work[i], m+1).im); } } } } /* V_k+1 = V_m+1 P_k+1 */ /* _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1); */ for(l = 0; l < np1; l++) { mul(Z[l], work[l][0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(Z[l], V[i], work[l][i], N); } } /* copy back to V */ for(i = 0; i < np1; i++) { assign(V[i], Z[i], N); } /* Reorthogonalise v_nr_ev */ ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2); if(g_debug_level > 3) { for(i = 0; i < np1; i++) { for(l = 0; l < np1; l++) { tmp1 = scalar_prod(V[l], V[i], N, 1); if(g_proc_id == 0) { printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im); } } } } /* Copy the content of H to work, work2 and G */ for(i=0; i < mp1; i++) { for(l = 0; l < mp1; l++) { G[i][l] = H[i][l]; work2[i][l] = H[i][l]; work[l][i].re = H[i][l].re; work[l][i].im = -H[i][l].im; } } for(j = ne; j < m; j++) { /* solver_field[0]=A*v_j */ f(solver_field[1], V[j]); /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[j][i] = scalar_prod(V[i], solver_field[1], N, 1); /* H, G, work and work2 are now all in Fortran storage: columns first */ G[j][i] = H[j][i]; work2[j][i] = H[j][i]; work[i][j].re = H[j][i].re; work[i][j].im = -H[j][i].im; assign_diff_mul(solver_field[1], V[i], H[j][i], N); } beta2 = square_norm(solver_field[1], N, 1); _complex_set(H[j][j+1], sqrt(beta2), 0.); G[j][j+1] = H[j][j+1]; work2[j][j+1] = H[j][j+1]; work[j+1][j].re = H[j][j+1].re; work[j+1][j].im = -H[j][j+1].im; mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N); } /* Solve the least square problem for alpha*/ /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); alpha[i] = c[i]; if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } if(lswork == NULL) { lwork = -1; _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1); lwork = (int)tmp1.re; lswork = (complex*)malloc(lwork*sizeof(complex)); } _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("zgels returned info = %d\n", info); fflush(stdout); } /* Compute the new solution vector */ for(i = 0; i < m; i++){ if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } } /* If maximal number of restart is reached */ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(-1); }