PetscErrorCode DSUpdateExtraRow_NHEP(DS ds) { PetscErrorCode ierr; PetscInt i; PetscBLASInt n,ld,incx=1; PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; Q = ds->mat[DS_MAT_Q]; ierr = DSAllocateWork_Private(ds,2*ld,0,0);CHKERRQ(ierr); x = ds->work; y = ds->work+ld; for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx)); for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]); ds->k = n; PetscFunctionReturn(0); }
PetscErrorCode VecSquare(Vec v) { PetscErrorCode ierr; PetscScalar *x; PetscInt i, n; PetscFunctionBegin; ierr = VecGetLocalSize(v, &n);CHKERRQ(ierr); ierr = VecGetArray(v, &x);CHKERRQ(ierr); for (i = 0; i < n; i++) x[i] *= PetscConj(x[i]); ierr = VecRestoreArray(v, &x);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode VecConjugate_Seq(Vec xin) { PetscScalar *x; PetscInt n = xin->map->n; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArray(xin,&x);CHKERRQ(ierr); while (n-->0) { *x = PetscConj(*x); x++; } ierr = VecRestoreArray(xin,&x);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* Custom CGS orthogonalization, preprocess after first orthogonalization */ static PetscErrorCode SVDOrthogonalizeCGS(BV V,PetscInt i,PetscScalar* h,PetscReal a,BVOrthogRefineType refine,PetscReal eta,PetscReal *norm) { PetscErrorCode ierr; PetscReal sum,onorm; PetscScalar dot; PetscInt j; PetscFunctionBegin; switch (refine) { case BV_ORTHOG_REFINE_NEVER: ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr); break; case BV_ORTHOG_REFINE_ALWAYS: ierr = BVSetActiveColumns(V,0,i);CHKERRQ(ierr); ierr = BVDotColumn(V,i,h);CHKERRQ(ierr); ierr = BVMultColumn(V,-1.0,1.0,i,h);CHKERRQ(ierr); ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr); break; case BV_ORTHOG_REFINE_IFNEEDED: dot = h[i]; onorm = PetscSqrtReal(PetscRealPart(dot)) / a; sum = 0.0; for (j=0;j<i;j++) { sum += PetscRealPart(h[j] * PetscConj(h[j])); } *norm = PetscRealPart(dot)/(a*a) - sum; if (*norm>0.0) *norm = PetscSqrtReal(*norm); else { ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr); } if (*norm < eta*onorm) { ierr = BVSetActiveColumns(V,0,i);CHKERRQ(ierr); ierr = BVDotColumn(V,i,h);CHKERRQ(ierr); ierr = BVMultColumn(V,-1.0,1.0,i,h);CHKERRQ(ierr); ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr); } break; } PetscFunctionReturn(0); }
static PetscErrorCode getSumSquares(Mat matrix, double *diag) { PetscErrorCode ierr; int i, j; double *sumr, *sumc; PetscInt n, mLocal, nLocal, low, high; PetscReal *aux; PetscFunctionBegin; ierr = MatGetSize(matrix, NULL, &n); CHKERRQ(ierr); ierr = MatGetLocalSize(matrix, &mLocal, &nLocal); CHKERRQ(ierr); sumr = diag; sumc = &diag[mLocal]; ierr = PetscMalloc1(n, &aux); CHKERRQ(ierr); ierr = MatGetColumnNorms(matrix, NORM_2, aux); CHKERRQ(ierr); ierr = MatGetOwnershipRangeColumn(matrix, &low, &high);CHKERRQ(ierr); for (i=low; i<high; i++) { sumc[i-low] = aux[i]*aux[i]; } ierr = PetscFree(aux); CHKERRQ(ierr); ierr = MatGetOwnershipRange(matrix, &low, &high); CHKERRQ(ierr); for (i=low; i<high; i++) { PetscInt ncols; const PetscInt *cols; const PetscScalar *vals; sumr[i-low] = 0.0; ierr = MatGetRow(matrix, i, &ncols, &cols, &vals); CHKERRQ(ierr); for (j = 0; j < ncols; j++) { sumr[i-low] += PetscRealPart(vals[j]*PetscConj(vals[j])); } ierr = MatRestoreRow(matrix, i, &ncols, &cols, &vals); CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_IBCGS(KSP ksp) { PetscErrorCode ierr; PetscInt i,N; PetscReal rnorm,rnormin = 0.0; #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithematic rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits) and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE. Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double precision into a 16 byte space (the rest of the space is ignored) */ long double insums[7],outsums[7]; #else PetscScalar insums[7],outsums[7]; #endif PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2; PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan; const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un; PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn; /* the rest do not have to keep n_1 values */ PetscScalar kappan, thetan, etan, gamman, betan, deltan; const PetscScalar *PETSC_RESTRICT tn; PetscScalar *PETSC_RESTRICT sn; Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un; Mat A; PetscFunctionBegin; if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors"); ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr); Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr); B = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr); R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr); Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr); Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr); F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr); Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr); Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr); Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr); Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr); Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr); /* r0 = rn_1 = b - A*xn_1; */ /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr); ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */ ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr); ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr); /* un_1 = A*rn_1; */ ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr); /* f0 = A'*rn_1; */ if (ksp->pc_side == PC_RIGHT) { /* B' A' */ ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr); ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr); } else if (ksp->pc_side == PC_LEFT) { /* A' B' */ ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr); ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr); } /*qn_1 = vn_1 = zn_1 = 0.0; */ ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr); ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr); sigman_2 = pin_1 = taun_1 = 0.0; /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */ ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); /* sigman_1 = rn_1'un_1 */ ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); alphan_1 = omegan_1 = 1.0; for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) { rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1; /* if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */ if (ksp->its == 1) deltan = rhon; else deltan = rhon/taun_1; betan = deltan/omegan_1; taun = sigman_1 + betan*taun_1 - deltan*pin_1; if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its); alphan = rhon/taun; ierr = PetscLogFlops(15.0); /* zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1 vn = un_1 + betan*vn_1 - deltan*qn_1 sn = rn_1 - alphan*vn The algorithm in the paper is missing the alphan/alphan_1 term in the zn update */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); tmp1 = (alphan/alphan_1)*betan; tmp2 = alphan*deltan; for (i=0; i<N; i++) { zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i]; vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i]; sn[i] = rn_1[i] - alphan*vn[i]; } ierr = PetscLogFlops(3.0+11.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); /* qn = A*vn */ ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr); /* tn = un_1 - alphan*qn */ ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr); /* phin = r0'sn pin = r0'qn gamman = f0'sn etan = f0'tn thetan = sn'tn kappan = tn'tn */ ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); phin = pin = gamman = etan = thetan = kappan = 0.0; for (i=0; i<N; i++) { phin += r0[i]*sn[i]; pin += r0[i]*qn[i]; gamman += f0[i]*sn[i]; etan += f0[i]*tn[i]; thetan += sn[i]*tn[i]; kappan += tn[i]*tn[i]; } ierr = PetscLogFlops(12.0*N); ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr); insums[0] = phin; insums[1] = pin; insums[2] = gamman; insums[3] = etan; insums[4] = thetan; insums[5] = kappan; insums[6] = rnormin; ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #else if (ksp->lagnorm && ksp->its > 1) { ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } else { ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); } #endif ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); phin = outsums[0]; pin = outsums[1]; gamman = outsums[2]; etan = outsums[3]; thetan = outsums[4]; kappan = outsums[5]; if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6])); if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its); if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its); omegan = thetan/kappan; sigman = gamman - omegan*etan; /* rn = sn - omegan*tn xn = xn_1 + zn + omegan*sn */ ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr); rnormin = 0.0; for (i=0; i<N; i++) { rn[i] = sn[i] - omegan*tn[i]; rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]); xn[i] += zn[i] + omegan*sn[i]; } ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr); ierr = PetscLogFlops(7.0*N); ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr); if (!ksp->lagnorm && ksp->chknorm < ksp->its) { ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr); ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr); rnorm = PetscSqrtReal(rnorm); } /* Test for convergence */ ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; /* un = A*rn */ ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr); /* Update n-1 locations with n locations */ sigman_2 = sigman_1; sigman_1 = sigman; pin_1 = pin; phin_1 = phin; alphan_1 = alphan; taun_1 = taun; omegan_1 = omegan; } if (ksp->its >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int Argc,char **Args) { PetscBool flg; PetscInt n = -6; PetscScalar rho = 1.0; PetscReal h; PetscReal beta = 1.0; DM da; PetscRandom rctx; PetscMPIInt comm_size; Mat H,HtH; PetscInt x, y, xs, ys, xm, ym; PetscReal r1, r2; PetscScalar uxy1, uxy2; MatStencil sxy, sxy_m; PetscScalar val, valconj; Vec b, Htb,xvec; KSP kspmg; PC pcmg; PetscErrorCode ierr; PetscInt ix[1] = {0}; PetscScalar vals[1] = {1.0}; PetscInitialize(&Argc,&Args,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-size",&n,&flg);CHKERRQ(ierr); ierr = PetscOptionsGetReal(NULL,"-beta",&beta,&flg);CHKERRQ(ierr); ierr = PetscOptionsGetScalar(NULL,"-rho",&rho,&flg);CHKERRQ(ierr); /* Set the fudge parameters, we scale the whole thing by 1/(2*h) later */ h = 1.; rho *= 1./(2.*h); /* Geometry info */ ierr = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC, DMDA_STENCIL_STAR, n, n, PETSC_DECIDE, PETSC_DECIDE, 2 /* this is the # of dof's */, 1, NULL, NULL, &da);CHKERRQ(ierr); /* Random numbers */ ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); /* Single or multi processor ? */ ierr = MPI_Comm_size(PETSC_COMM_WORLD,&comm_size);CHKERRQ(ierr); /* construct matrix */ ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr); ierr = DMCreateMatrix(da, &H);CHKERRQ(ierr); /* get local corners for this processor */ ierr = DMDAGetCorners(da,&xs,&ys,0,&xm,&ym,0);CHKERRQ(ierr); /* Assemble the matrix */ for (x=xs; x<xs+xm; x++) { for (y=ys; y<ys+ym; y++) { /* each lattice point sets only the *forward* pointing parameters (right, down), i.e. Nabla_1^+ and Nabla_2^+. In this way we can use only local random number creation. That means we also have to set the corresponding backward pointing entries. */ /* Compute some normally distributed random numbers via Box-Muller */ ierr = PetscRandomGetValueReal(rctx, &r1);CHKERRQ(ierr); r1 = 1.-r1; /* to change from [0,1) to (0,1], which we need for the log */ ierr = PetscRandomGetValueReal(rctx, &r2);CHKERRQ(ierr); PetscReal R = PetscSqrtReal(-2.*PetscLogReal(r1)); PetscReal c = PetscCosReal(2.*PETSC_PI*r2); PetscReal s = PetscSinReal(2.*PETSC_PI*r2); /* use those to set the field */ uxy1 = PetscExpScalar(((PetscScalar) (R*c/beta))*PETSC_i); uxy2 = PetscExpScalar(((PetscScalar) (R*s/beta))*PETSC_i); sxy.i = x; sxy.j = y; /* the point where we are */ /* center action */ sxy.c = 0; /* spin 0, 0 */ ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy, &rho, ADD_VALUES);CHKERRQ(ierr); sxy.c = 1; /* spin 1, 1 */ val = -rho; ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); sxy_m.i = x+1; sxy_m.j = y; /* right action */ sxy.c = 0; sxy_m.c = 0; /* spin 0, 0 */ val = -uxy1; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 0; sxy_m.c = 1; /* spin 0, 1 */ val = -uxy1; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 1; sxy_m.c = 0; /* spin 1, 0 */ val = uxy1; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 1; sxy_m.c = 1; /* spin 1, 1 */ val = uxy1; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy_m.i = x; sxy_m.j = y+1; /* down action */ sxy.c = 0; sxy_m.c = 0; /* spin 0, 0 */ val = -uxy2; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 0; sxy_m.c = 1; /* spin 0, 1 */ val = -PETSC_i*uxy2; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 1; sxy_m.c = 0; /* spin 1, 0 */ val = -PETSC_i*uxy2; valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); sxy.c = 1; sxy_m.c = 1; /* spin 1, 1 */ val = PetscConj(uxy2); valconj = PetscConj(val); ierr = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(H, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(H, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* scale H */ ierr = MatScale(H, 1./(2.*h));CHKERRQ(ierr); /* it looks like H is Hermetian */ /* construct normal equations */ ierr = MatMatMult(H, H, MAT_INITIAL_MATRIX, 1., &HtH);CHKERRQ(ierr); /* permutation matrix to check whether H and HtH are identical to the ones in the paper */ /* Mat perm; */ /* ierr = DMCreateMatrix(da, &perm);CHKERRQ(ierr); */ /* PetscInt row, col; */ /* PetscScalar one = 1.0; */ /* for (PetscInt i=0; i<n; i++) { */ /* for (PetscInt j=0; j<n; j++) { */ /* row = (i*n+j)*2; col = i*n+j; */ /* ierr = MatSetValues(perm, 1, &row, 1, &col, &one, INSERT_VALUES);CHKERRQ(ierr); */ /* row = (i*n+j)*2+1; col = i*n+j + n*n; */ /* ierr = MatSetValues(perm, 1, &row, 1, &col, &one, INSERT_VALUES);CHKERRQ(ierr); */ /* } */ /* } */ /* ierr = MatAssemblyBegin(perm, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); */ /* ierr = MatAssemblyEnd(perm, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); */ /* Mat Hperm; */ /* ierr = MatPtAP(H, perm, MAT_INITIAL_MATRIX, 1.0, &Hperm);CHKERRQ(ierr); */ /* ierr = PetscPrintf(PETSC_COMM_WORLD, "Matrix H after construction\n");CHKERRQ(ierr); */ /* ierr = MatView(Hperm, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */ /* Mat HtHperm; */ /* ierr = MatPtAP(HtH, perm, MAT_INITIAL_MATRIX, 1.0, &HtHperm);CHKERRQ(ierr); */ /* ierr = PetscPrintf(PETSC_COMM_WORLD, "Matrix HtH:\n");CHKERRQ(ierr); */ /* ierr = MatView(HtHperm, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */ /* right hand side */ ierr = DMCreateGlobalVector(da, &b);CHKERRQ(ierr); ierr = VecSet(b,0.0);CHKERRQ(ierr); ierr = VecSetValues(b, 1, ix, vals, INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(b);CHKERRQ(ierr); ierr = VecAssemblyEnd(b);CHKERRQ(ierr); /* ierr = VecSetRandom(b, rctx);CHKERRQ(ierr); */ ierr = VecDuplicate(b, &Htb);CHKERRQ(ierr); ierr = MatMultTranspose(H, b, Htb);CHKERRQ(ierr); /* construct solver */ ierr = KSPCreate(PETSC_COMM_WORLD,&kspmg);CHKERRQ(ierr); ierr = KSPSetType(kspmg, KSPCG);CHKERRQ(ierr); ierr = KSPGetPC(kspmg,&pcmg);CHKERRQ(ierr); ierr = PCSetType(pcmg,PCASA);CHKERRQ(ierr); /* maybe user wants to override some of the choices */ ierr = KSPSetFromOptions(kspmg);CHKERRQ(ierr); ierr = KSPSetOperators(kspmg, HtH, HtH, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = DMDASetRefinementFactor(da, 3, 3, 3);CHKERRQ(ierr); ierr = PCSetDM(pcmg,da);CHKERRQ(ierr); ierr = PCASASetTolerances(pcmg, 1.e-6, 1.e-10,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr); ierr = VecDuplicate(b, &xvec);CHKERRQ(ierr); ierr = VecSet(xvec, 0.0);CHKERRQ(ierr); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the linear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ ierr = KSPSolve(kspmg, Htb, xvec);CHKERRQ(ierr); /* ierr = VecView(xvec, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */ ierr = KSPDestroy(&kspmg);CHKERRQ(ierr); ierr = VecDestroy(&xvec);CHKERRQ(ierr); /* seems to be destroyed by KSPDestroy */ ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&Htb);CHKERRQ(ierr); ierr = MatDestroy(&HtH);CHKERRQ(ierr); ierr = MatDestroy(&H);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
PetscErrorCode KSPSolve_BiCG(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscBool diagonalscale; PetscScalar dpi,a=1.0,beta,betaold=1.0,b,ma; PetscReal dp; Vec X,B,Zl,Zr,Rl,Rr,Pl,Pr; Mat Amat,Pmat; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); X = ksp->vec_sol; B = ksp->vec_rhs; Rl = ksp->work[0]; Zl = ksp->work[1]; Pl = ksp->work[2]; Rr = ksp->work[3]; Zr = ksp->work[4]; Pr = ksp->work[5]; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,Rr);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(Rr,-1.0,B);CHKERRQ(ierr); } else { ierr = VecCopy(B,Rr);CHKERRQ(ierr); /* r <- b (x is 0) */ } ierr = VecCopy(Rr,Rl);CHKERRQ(ierr); ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr); /* z <- Br */ ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr); ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = VecConjugate(Zl);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNorm(Zr,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z */ } else { ierr = VecNorm(Rr,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r */ } ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); i = 0; do { ierr = VecDot(Zr,Rl,&beta);CHKERRQ(ierr); /* beta <- r'z */ if (!i) { if (beta == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } ierr = VecCopy(Zr,Pr);CHKERRQ(ierr); /* p <- z */ ierr = VecCopy(Zl,Pl);CHKERRQ(ierr); } else { b = beta/betaold; ierr = VecAYPX(Pr,b,Zr);CHKERRQ(ierr); /* p <- z + b* p */ b = PetscConj(b); ierr = VecAYPX(Pl,b,Zl);CHKERRQ(ierr); } betaold = beta; ierr = KSP_MatMult(ksp,Amat,Pr,Zr);CHKERRQ(ierr); /* z <- Kp */ ierr = VecConjugate(Pl);CHKERRQ(ierr); ierr = KSP_MatMultTranspose(ksp,Amat,Pl,Zl);CHKERRQ(ierr); ierr = VecConjugate(Pl);CHKERRQ(ierr); ierr = VecConjugate(Zl);CHKERRQ(ierr); ierr = VecDot(Zr,Pl,&dpi);CHKERRQ(ierr); /* dpi <- z'p */ a = beta/dpi; /* a = beta/p'z */ ierr = VecAXPY(X,a,Pr);CHKERRQ(ierr); /* x <- x + ap */ ma = -a; ierr = VecAXPY(Rr,ma,Zr);CHKERRQ(ierr); ma = PetscConj(ma); ierr = VecAXPY(Rl,ma,Zl);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr); /* z <- Br */ ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr); ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = VecConjugate(Zl);CHKERRQ(ierr); ierr = VecNorm(Zr,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z */ } else { ierr = VecNorm(Rr,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r */ } ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = i+1; ksp->rnorm = dp; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr); /* z <- Br */ ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr); ierr = VecConjugate(Rl);CHKERRQ(ierr); ierr = VecConjugate(Zl);CHKERRQ(ierr); } i++; } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z) { PetscErrorCode ierr; PetscInt n = xin->map->n,i,j,nv_rem,j_rem; PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3; const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase; Vec *yy; PetscFunctionBegin; sum0 = 0.; sum1 = 0.; sum2 = 0.; i = nv; nv_rem = nv&0x3; yy = (Vec *)yin; j = n; ierr = VecGetArrayRead(xin,&xbase);CHKERRQ(ierr); x = xbase; switch (nv_rem) { case 3: ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); sum2 += x2*PetscConj(yy2[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); sum2 += x1*PetscConj(yy2[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); sum2 += x0*PetscConj(yy2[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; yy2 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4; j -= 4; } z[0] = sum0; z[1] = sum1; z[2] = sum2; ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr); break; case 2: ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; j -= 4; } z[0] = sum0; z[1] = sum1; ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr); break; case 1: ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); case 0: x += j_rem; yy0 += j_rem; j -= j_rem; break; } while (j>0) { sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1]) + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]); yy0+=4; j -= 4; x+=4; } z[0] = sum0; ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr); break; case 0: break; } z += nv_rem; i -= nv_rem; yy += nv_rem; while (i >0) { sum0 = 0.; sum1 = 0.; sum2 = 0.; sum3 = 0.; ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr); ierr = VecGetArrayRead(yy[3],&yy3);CHKERRQ(ierr); j = n; x = xbase; switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; yy2 += j_rem; yy3 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4; sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4; j -= 4; } z[0] = sum0; z[1] = sum1; z[2] = sum2; z[3] = sum3; z += 4; i -= 4; ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr); ierr = VecRestoreArrayRead(yy[3],&yy3);CHKERRQ(ierr); yy += 4; } ierr = VecRestoreArrayRead(xin,&xbase);CHKERRQ(ierr); ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode SNESMonitorVI(SNES snes,PetscInt its,PetscReal fgnorm,void *dummy) { PetscErrorCode ierr; PetscViewer viewer = dummy ? (PetscViewer) dummy : PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)snes)); const PetscScalar *x,*xl,*xu,*f; PetscInt i,n,act[2] = {0,0},fact[2],N; /* Number of components that actually hit the bounds (c.f. active variables) */ PetscInt act_bound[2] = {0,0},fact_bound[2]; PetscReal rnorm,fnorm; double tmp; PetscFunctionBegin; ierr = VecGetLocalSize(snes->vec_sol,&n);CHKERRQ(ierr); ierr = VecGetSize(snes->vec_sol,&N);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->xl,&xl);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->xu,&xu);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->vec_sol,&x);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->vec_func,&f);CHKERRQ(ierr); rnorm = 0.0; for (i=0; i<n; i++) { if (((PetscRealPart(x[i]) > PetscRealPart(xl[i]) + 1.e-8 || (PetscRealPart(f[i]) < 0.0)) && ((PetscRealPart(x[i]) < PetscRealPart(xu[i]) - 1.e-8) || PetscRealPart(f[i]) > 0.0))) rnorm += PetscRealPart(PetscConj(f[i])*f[i]); else if (PetscRealPart(x[i]) <= PetscRealPart(xl[i]) + 1.e-8 && PetscRealPart(f[i]) >= 0.0) act[0]++; else if (PetscRealPart(x[i]) >= PetscRealPart(xu[i]) - 1.e-8 && PetscRealPart(f[i]) <= 0.0) act[1]++; else SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_PLIB,"Can never get here"); } for (i=0; i<n; i++) { if (PetscRealPart(x[i]) <= PetscRealPart(xl[i]) + 1.e-8) act_bound[0]++; else if (PetscRealPart(x[i]) >= PetscRealPart(xu[i]) - 1.e-8) act_bound[1]++; } ierr = VecRestoreArrayRead(snes->vec_func,&f);CHKERRQ(ierr); ierr = VecRestoreArrayRead(snes->xl,&xl);CHKERRQ(ierr); ierr = VecRestoreArrayRead(snes->xu,&xu);CHKERRQ(ierr); ierr = VecRestoreArrayRead(snes->vec_sol,&x);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnorm,&fnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr); ierr = MPI_Allreduce(act,fact,2,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr); ierr = MPI_Allreduce(act_bound,fact_bound,2,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr); fnorm = PetscSqrtReal(fnorm); ierr = PetscViewerASCIIAddTab(viewer,((PetscObject)snes)->tablevel);CHKERRQ(ierr); if (snes->ntruebounds) tmp = ((double)(fact[0]+fact[1]))/((double)snes->ntruebounds); else tmp = 0.0; ierr = PetscViewerASCIIPrintf(viewer,"%3D SNES VI Function norm %14.12e Active lower constraints %D/%D upper constraints %D/%D Percent of total %g Percent of bounded %g\n",its,(double)fnorm,fact[0],fact_bound[0],fact[1],fact_bound[1],((double)(fact[0]+fact[1]))/((double)N),tmp);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(viewer,((PetscObject)snes)->tablevel);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode SNESVIComputeInactiveSetFnorm(SNES snes,Vec F,Vec X, PetscReal *fnorm) { PetscErrorCode ierr; const PetscScalar *x,*xl,*xu,*f; PetscInt i,n; PetscReal rnorm; PetscFunctionBegin; ierr = VecGetLocalSize(X,&n);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->xl,&xl);CHKERRQ(ierr); ierr = VecGetArrayRead(snes->xu,&xu);CHKERRQ(ierr); ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); ierr = VecGetArrayRead(F,&f);CHKERRQ(ierr); rnorm = 0.0; for (i=0; i<n; i++) { if (((PetscRealPart(x[i]) > PetscRealPart(xl[i]) + 1.e-8 || (PetscRealPart(f[i]) < 0.0)) && ((PetscRealPart(x[i]) < PetscRealPart(xu[i]) - 1.e-8) || PetscRealPart(f[i]) > 0.0))) rnorm += PetscRealPart(PetscConj(f[i])*f[i]); } ierr = VecRestoreArrayRead(F,&f);CHKERRQ(ierr); ierr = VecRestoreArrayRead(snes->xl,&xl);CHKERRQ(ierr); ierr = VecRestoreArrayRead(snes->xu,&xu);CHKERRQ(ierr); ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); ierr = MPI_Allreduce(&rnorm,fnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr); *fnorm = PetscSqrtReal(*fnorm); PetscFunctionReturn(0); }
PetscErrorCode cHamiltonianMatrix::measurement(){ double *ALLdepart = new double[Nt]; double *ALLentropy = new double[Nt]; gsl_matrix* density1 = gsl_matrix_alloc(L,Nt);//background fermion density gsl_matrix* density2 = gsl_matrix_alloc(L,Nt);//background fermion density gsl_vector* corr12 = gsl_vector_alloc(Nt);//correlation betwen fermion up and down. // The density correlation is in fact proportional to the interacting energy. double var_rank; PetscScalar var_tmp, var_tmp2; gsl_complex var_tmp_gsl; Vec vectort; VecScatter ctx; ofstream output; VecScatterCreateToZero(WFt[0],&ctx,&vectort); if(rank==0) cout << size << endl; gsl_matrix_complex* RDM = gsl_matrix_complex_alloc(dim2,dim2); gsl_vector *eval_RDM = gsl_vector_alloc(dim2); gsl_eigen_herm_workspace* w_RDM = gsl_eigen_herm_alloc(dim2); for (int itime = 0; itime < Nt; ++itime) { if (rank==0&&itime%10==0) cout << "this is time " << itime << endl; // % ## departure ## var_rank = 0.0; for (int ivar = rstart; ivar < rend; ++ivar) { ierr = VecGetValues(WFt[itime],1,&ivar,&var_tmp);CHKERRQ(ierr); var_rank += pow(gsl_vector_get(rr,ivar)*PetscAbsComplex(var_tmp),2); } MPI_Reduce(&var_rank, &(ALLdepart[itime]), 1, MPI_DOUBLE, MPI_SUM, 0, PETSC_COMM_WORLD); ALLdepart[itime] = sqrt(ALLdepart[itime]); // % ## entropy ## VecScatterBegin(ctx,WFt[itime],vectort,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(ctx,WFt[itime],vectort,INSERT_VALUES,SCATTER_FORWARD); if(rank==0) { int ivar;double eigen_RDM; gsl_matrix_complex_set_zero(RDM); for (int row2 = 0; row2 < dim2; ++row2) { for (int col2 = row2; col2 < dim2; ++col2) { var_tmp_gsl.dat[0] = 0.0; var_tmp_gsl.dat[1] = 0.0; for (int jjj = 0; jjj < dim; ++jjj) { ivar = row2*dim+jjj; ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr); ivar = col2*dim+jjj; ierr = VecGetValues(vectort,1,&ivar,&var_tmp2);CHKERRQ(ierr); var_tmp_gsl.dat[0] += PetscRealPart(var_tmp*PetscConj(var_tmp2)); var_tmp_gsl.dat[1] += PetscImaginaryPart(var_tmp*PetscConj(var_tmp2)); } gsl_matrix_complex_set(RDM,row2,col2,var_tmp_gsl); if (col2 != row2) { gsl_matrix_complex_set(RDM,col2,row2,gsl_matrix_complex_get(RDM,row2,col2)); } } } gsl_eigen_herm(RDM,eval_RDM,w_RDM); ALLentropy[itime] = 0; for (ivar = 0; ivar < dim2; ++ivar) { eigen_RDM = gsl_vector_get(eval_RDM, ivar); // cout << eigen_RDM << endl; ALLentropy[itime] += -eigen_RDM*log(eigen_RDM); } } // % ## density distribution of impurity fermion if(rank==0) { int ivar; for (int row2 = 0; row2 < dim2; ++row2) { for (int jpar = 0; jpar < N2; ++jpar) { double density_tmp=0; for (int jjj = 0; jjj < dim; ++jjj) { ivar = row2*dim+jjj; ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr); density_tmp +=pow(PetscAbsComplex(var_tmp),2); } /*if (itime==0) { if (rank==0) cout << "density_tmp=" << density_tmp << endl; }*/ gsl_matrix_set(density2,gsl_matrix_get(basis2,jpar,row2)-1,itime,gsl_matrix_get(density2,gsl_matrix_get(basis2,jpar,row2)-1,itime)+density_tmp); } } } /*if (rank==0) { cout << "density of impurity:" << endl; for (int jpar =0; jpar < L; ++jpar) { cout << gsl_matrix_get(density2,jpar,itime) << "\t"; } cout << endl; }*/ // % ## density distribution of majority fermions if(rank==0) { int ivar; for (int jjj = 0; jjj < dim; ++jjj) { for (int jpar = 0; jpar < N; ++jpar) { double density_tmp=0; for (int row2 = 0; row2 < dim2; ++row2) { ivar = row2*dim+jjj; ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr); density_tmp +=pow(PetscAbsComplex(var_tmp),2); } gsl_matrix_set(density1,gsl_matrix_get(basis1,jpar,jjj)-1,itime,gsl_matrix_get(density1,gsl_matrix_get(basis1,jpar,jjj)-1,itime)+density_tmp); } } } // correlation between impurity and majority fermion if(rank==0) { int ivar; double corr_tmp=0; for (int jimp=0; jimp<dim2; ++jimp) { for (int jmaj=0; jmaj<dim; ++jmaj) { for (int jpar=0; jpar<N; ++jpar) { if (gsl_matrix_get(basis1,jpar,jmaj)==jimp+1){ ivar = jimp*dim+jmaj; ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr); corr_tmp+=pow(PetscAbsComplex(var_tmp),2); } } } } gsl_vector_set(corr12,itime,corr_tmp); }// end of correlation } if (rank == 0) { char filename[50]; sprintf(filename,"measurement.data"); output.open(filename); output.is_open(); output.precision(16); for (int itime = 0; itime < Nt; ++itime) { if (itime==0) { // cout << "time t[1] " << '\t' << "departure[2] " << '\t' << "entropy[3]" << '\t' << "density of majority [L]" <<'\t' << "density of impurity [L]" << endl; } output << dt*itime-3 << '\t' << ALLdepart[itime] << '\t' << ALLentropy[itime] << '\t'; for (int jpar = 0; jpar < L; ++jpar) { output << gsl_matrix_get(density1,jpar,itime) << '\t'; } for (int jpar = 0; jpar < L; ++jpar) { output << gsl_matrix_get(density2,jpar,itime) << '\t'; } output << gsl_vector_get(corr12,itime) << '\t'; output << endl; } output.close(); } // CopyFile(source,destination,FALSE); delete[] ALLdepart; VecScatterDestroy(&ctx); VecDestroy(&vectort); gsl_matrix_complex_free(RDM); gsl_vector_free(eval_RDM); gsl_eigen_herm_free(w_RDM); gsl_matrix_free(density1); gsl_matrix_free(density2); gsl_vector_free(corr12); // CopyFile(source,destination,FALSE); return ierr; }
/* . it - column of the Hessenberg that is complete, PGMRES is actually computing two columns ahead of this */ static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) { PetscScalar *hh,*cc,*ss,*rs; PetscInt j; PetscReal hapbnd; KSP_PGMRES *pgmres = (KSP_PGMRES*)(ksp->data); PetscErrorCode ierr; PetscFunctionBegin; hh = HH(0,it); /* pointer to beginning of column to update */ cc = CC(0); /* beginning of cosine rotations */ ss = SS(0); /* beginning of sine rotations */ rs = RS(0); /* right hand side of least squares system */ /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; /* check for the happy breakdown */ hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pgmres->haptol); if (PetscAbsScalar(hh[it+1]) < hapbnd) { ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); *hapend = PETSC_TRUE; } /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), and some refs have [c s ; -conj(s) c] (don't be confused!) */ for (j=0; j<it; j++) { PetscScalar hhj = hh[j]; hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; } /* compute the new plane rotation, and apply it to: 1) the right-hand-side of the Hessenberg system (RS) note: it affects RS(it) and RS(it+1) 2) the new column of the Hessenberg matrix note: it affects HH(it,it) which is currently pointed to by hh and HH(it+1, it) (*(hh+1)) thus obtaining the updated value of the residual... */ /* compute new plane rotation */ if (!*hapend) { PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); if (delta == 0.0) { ksp->reason = KSP_DIVERGED_NULL; PetscFunctionReturn(0); } cc[it] = hh[it] / delta; /* new cosine value */ ss[it] = hh[it+1] / delta; /* new sine value */ hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; rs[it+1] = -ss[it]*rs[it]; rs[it] = PetscConj(cc[it])*rs[it]; *res = PetscAbsScalar(rs[it+1]); } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply another rotation matrix (so RH doesn't change). The new residual is always the new sine term times the residual from last time (RS(it)), but now the new sine rotation would be zero...so the residual should be zero...so we will multiply "zero" by the last residual. This might not be exactly what we want to do here -could just return "zero". */ *res = 0.0; } PetscFunctionReturn(0); }
static PetscErrorCode KSPFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res) { PetscScalar *hh,*cc,*ss,tt; PetscInt j; KSP_FGMRES *fgmres = (KSP_FGMRES*)(ksp->data); PetscFunctionBegin; hh = HH(0,it); /* pointer to beginning of column to update - so incrementing hh "steps down" the (it+1)th col of HH*/ cc = CC(0); /* beginning of cosine rotations */ ss = SS(0); /* beginning of sine rotations */ /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), and some refs have [c s ; -conj(s) c] (don't be confused!) */ for (j=1; j<=it; j++) { tt = *hh; *hh = PetscConj(*cc) * tt + *ss * *(hh+1); hh++; *hh = *cc++ * *hh - (*ss++ * tt); /* hh, cc, and ss have all been incremented one by end of loop */ } /* compute the new plane rotation, and apply it to: 1) the right-hand-side of the Hessenberg system (RS) note: it affects RS(it) and RS(it+1) 2) the new column of the Hessenberg matrix note: it affects HH(it,it) which is currently pointed to by hh and HH(it+1, it) (*(hh+1)) thus obtaining the updated value of the residual... */ /* compute new plane rotation */ if (!hapend) { tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1)); if (tt == 0.0) { ksp->reason = KSP_DIVERGED_NULL; PetscFunctionReturn(0); } *cc = *hh / tt; /* new cosine value */ *ss = *(hh+1) / tt; /* new sine value */ /* apply to 1) and 2) */ *RS(it+1) = -(*ss * *RS(it)); *RS(it) = PetscConj(*cc) * *RS(it); *hh = PetscConj(*cc) * *hh + *ss * *(hh+1); /* residual is the last element (it+1) of right-hand side! */ *res = PetscAbsScalar(*RS(it+1)); } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply another rotation matrix (so RH doesn't change). The new residual is always the new sine term times the residual from last time (RS(it)), but now the new sine rotation would be zero...so the residual should be zero...so we will multiply "zero" by the last residual. This might not be exactly what we want to do here -could just return "zero". */ *res = 0.0; } PetscFunctionReturn(0); }
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma) { #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscInt i,j; PetscBLASInt *ipiv,info,n,ld,one=1,ncol; PetscScalar *A,*B,*Q,*g=gin,*ghat; PetscScalar done=1.0,dmone=-1.0,dzero=0.0; PetscReal gnorm; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; if (!recover) { ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!g) { ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); g = ds->work; } /* use workspace matrix W to factor A-tau*eye(n) */ ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); B = ds->mat[DS_MAT_W]; ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr); /* Vector g initialy stores b = beta*e_n^T */ ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr); g[n-1] = beta; /* g = (A-tau*eye(n))'\b */ for (i=0;i<n;i++) B[i+i*ld] -= tau; PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info)); if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization"); if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization"); ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info)); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr); /* A = A + g*b' */ for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta; } else { /* recover */ PetscValidPointer(g,6); ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); ghat = ds->work; Q = ds->mat[DS_MAT_Q]; /* g^ = -Q(:,idx)'*g */ ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one)); /* A = A + g^*b' */ for (i=0;i<ds->l+ds->k;i++) for (j=ds->l;j<ds->l+ds->k;j++) A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta; /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */ PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one)); } /* Compute gamma factor */ if (gamma) { gnorm = 0.0; for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i])); *gamma = PetscSqrtReal(1.0+gnorm); } PetscFunctionReturn(0); #endif }