PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left) { #if defined(SLEPC_MISSING_LAPACK_GESVD) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable"); #else PetscErrorCode ierr; PetscInt i,j; PetscBLASInt info,ld,n,n1,lwork,inc=1; PetscScalar sdummy,done=1.0,zero=0.0; PetscReal *sigma; PetscBool iscomplex = PETSC_FALSE; PetscScalar *A = ds->mat[DS_MAT_A]; PetscScalar *Q = ds->mat[DS_MAT_Q]; PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X]; PetscScalar *W; PetscFunctionBegin; if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors"); ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); n1 = n+1; if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE; if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet"); ierr = DSAllocateWork_Private(ds,5*ld,6*ld,0);CHKERRQ(ierr); ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); W = ds->mat[DS_MAT_W]; lwork = 5*ld; sigma = ds->rwork+5*ld; /* build A-w*I in W */ for (j=0;j<n;j++) for (i=0;i<=n;i++) W[i+j*ld] = A[i+j*ld]; for (i=0;i<n;i++) W[i+i*ld] -= A[(*k)+(*k)*ld]; /* compute SVD of W */ #if !defined(PETSC_USE_COMPLEX) PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info)); #else PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info)); #endif if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info); /* the smallest singular value is the new error estimate */ if (rnorm) *rnorm = sigma[n-1]; /* update vector with right singular vector associated to smallest singular value, accumulating the transformation matrix Q */ PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc)); PetscFunctionReturn(0); #endif }
/* Matrix free operation of 1d Laplacian and Grad for GLL spectral elements */ PetscErrorCode MatMult_Laplacian(Mat A,Vec x,Vec y) { AppCtx *appctx; PetscErrorCode ierr; PetscReal **temp,vv; PetscInt i,j,xs,xn; Vec xlocal,ylocal; const PetscScalar *xl; PetscScalar *yl; PetscBLASInt _One = 1,n; PetscScalar _DOne = 1; ierr = MatShellGetContext(A,&appctx);CHKERRQ(ierr); ierr = DMGetLocalVector(appctx->da,&xlocal);CHKERRQ(ierr); ierr = DMGlobalToLocalBegin(appctx->da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(appctx->da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); ierr = DMGetLocalVector(appctx->da,&ylocal);CHKERRQ(ierr); ierr = VecSet(ylocal,0.0);CHKERRQ(ierr); ierr = PetscGLLElementLaplacianCreate(&appctx->SEMop.gll,&temp);CHKERRQ(ierr); for (i=0; i<appctx->param.N; i++) { vv =-appctx->param.mu*2.0/appctx->param.Le; for (j=0; j<appctx->param.N; j++) temp[i][j]=temp[i][j]*vv; } ierr = DMDAVecGetArrayRead(appctx->da,xlocal,(void*)&xl);CHKERRQ(ierr); ierr = DMDAVecGetArray(appctx->da,ylocal,&yl);CHKERRQ(ierr); ierr = DMDAGetCorners(appctx->da,&xs,NULL,NULL,&xn,NULL,NULL);CHKERRQ(ierr); ierr = PetscBLASIntCast(appctx->param.N,&n);CHKERRQ(ierr); for (j=xs; j<xs+xn; j += appctx->param.N-1) { PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&_DOne,&temp[0][0],&n,&xl[j],&_One,&_DOne,&yl[j],&_One)); } ierr = DMDAVecRestoreArrayRead(appctx->da,xlocal,(void*)&xl);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(appctx->da,ylocal,&yl);CHKERRQ(ierr); ierr = PetscGLLElementLaplacianDestroy(&appctx->SEMop.gll,&temp);CHKERRQ(ierr); ierr = VecSet(y,0.0);CHKERRQ(ierr); ierr = DMLocalToGlobalBegin(appctx->da,ylocal,ADD_VALUES,y);CHKERRQ(ierr); ierr = DMLocalToGlobalEnd(appctx->da,ylocal,ADD_VALUES,y);CHKERRQ(ierr); ierr = DMRestoreLocalVector(appctx->da,&xlocal);CHKERRQ(ierr); ierr = DMRestoreLocalVector(appctx->da,&ylocal);CHKERRQ(ierr); ierr = VecPointwiseDivide(y,y,appctx->SEMop.mass);CHKERRQ(ierr); return 0; }
PetscErrorCode DSUpdateExtraRow_NHEP(DS ds) { PetscErrorCode ierr; PetscInt i; PetscBLASInt n,ld,incx=1; PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; Q = ds->mat[DS_MAT_Q]; ierr = DSAllocateWork_Private(ds,2*ld,0,0);CHKERRQ(ierr); x = ds->work; y = ds->work+ld; for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx)); for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]); ds->k = n; PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
void DenseMatrix<T>::_matvec_blas(T alpha, T beta, DenseVector<T>& dest, const DenseVector<T>& arg, bool trans) const { // Ensure that dest and arg sizes are compatible if (!trans) { // dest ~ A * arg // (mx1) (mxn) * (nx1) if ((dest.size() != this->m()) || (arg.size() != this->n())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } else // trans == true { // Ensure that dest and arg are proper size // dest ~ A^T * arg // (nx1) (nxm) * (mx1) if ((dest.size() != this->n()) || (arg.size() != this->m())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } // Calling sequence for dgemv: // // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply // We store everything in row-major order, so pass the transpose flag for // non-transposed matvecs and the 'n' flag for transposed matvecs char TRANS[] = "t"; if (trans) TRANS[0] = 'n'; // M - INTEGER. // On entry, M specifies the number of rows of the matrix A. // In C/C++, pass the number of *cols* of A int M = this->n(); // N - INTEGER. // On entry, N specifies the number of columns of the matrix A. // In C/C++, pass the number of *rows* of A int N = this->m(); // ALPHA - DOUBLE PRECISION. // The scalar constant passed to this function // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). // Before entry, the leading m by n part of the array A must // contain the matrix of coefficients. // The matrix, *this. Note that _matvec_blas is called from // a const function, vector_mult(), and so we have made this function const // as well. Since BLAS knows nothing about const, we have to cast it away // now. DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this ); std::vector<T>& a = a_ref.get_values(); // LDA - INTEGER. // On entry, LDA specifies the first dimension of A as declared // in the calling (sub) program. LDA must be at least // max( 1, m ). int LDA = M; // X - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. // Before entry, the incremented array X must contain the // vector x. // Here, we must cast away the const-ness of "arg" since BLAS knows // nothing about const DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg ); std::vector<T>& x = x_ref.get_values(); // INCX - INTEGER. // On entry, INCX specifies the increment for the elements of // X. INCX must not be zero. int INCX = 1; // BETA - DOUBLE PRECISION. // On entry, BETA specifies the scalar beta. When BETA is // supplied as zero then Y need not be set on input. // The second scalar constant passed to this function // Y - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. // Before entry with BETA non-zero, the incremented array Y // must contain the vector y. On exit, Y is overwritten by the // updated vector y. // The input vector "dest" std::vector<T>& y = dest.get_values(); // INCY - INTEGER. // On entry, INCY specifies the increment for the elements of // Y. INCY must not be zero. int INCY = 1; // Finally, ready to call the BLAS function BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY); }
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma) { #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscInt i,j; PetscBLASInt *ipiv,info,n,ld,one=1,ncol; PetscScalar *A,*B,*Q,*g=gin,*ghat; PetscScalar done=1.0,dmone=-1.0,dzero=0.0; PetscReal gnorm; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; if (!recover) { ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!g) { ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); g = ds->work; } /* use workspace matrix W to factor A-tau*eye(n) */ ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); B = ds->mat[DS_MAT_W]; ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr); /* Vector g initialy stores b = beta*e_n^T */ ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr); g[n-1] = beta; /* g = (A-tau*eye(n))'\b */ for (i=0;i<n;i++) B[i+i*ld] -= tau; PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info)); if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization"); if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization"); ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info)); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr); /* A = A + g*b' */ for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta; } else { /* recover */ PetscValidPointer(g,6); ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); ghat = ds->work; Q = ds->mat[DS_MAT_Q]; /* g^ = -Q(:,idx)'*g */ ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one)); /* A = A + g^*b' */ for (i=0;i<ds->l+ds->k;i++) for (j=ds->l;j<ds->l+ds->k;j++) A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta; /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */ PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one)); } /* Compute gamma factor */ if (gamma) { gnorm = 0.0; for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i])); *gamma = PetscSqrtReal(1.0+gnorm); } PetscFunctionReturn(0); #endif }
PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left) { #if defined(SLEPC_MISSING_LAPACK_TREVC) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable"); #else PetscErrorCode ierr; PetscInt i; PetscBLASInt mm=1,mout,info,ld,n,inc = 1; PetscScalar tmp,done=1.0,zero=0.0; PetscReal norm; PetscBool iscomplex = PETSC_FALSE; PetscBLASInt *select; PetscScalar *A = ds->mat[DS_MAT_A]; PetscScalar *Q = ds->mat[DS_MAT_Q]; PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X]; PetscScalar *Y; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); select = ds->iwork; for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE; /* Compute k-th eigenvector Y of A */ Y = X+(*k)*ld; select[*k] = (PetscBLASInt)PETSC_TRUE; #if !defined(PETSC_USE_COMPLEX) if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE; mm = iscomplex? 2: 1; if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE; ierr = DSAllocateWork_Private(ds,3*ld,0,0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info)); #else ierr = DSAllocateWork_Private(ds,2*ld,ld,0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info)); #endif if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info); if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments"); /* accumulate and normalize eigenvectors */ if (ds->state>=DS_STATE_CONDENSED) { ierr = PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc)); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc)); #endif norm = BLASnrm2_(&n,Y,&inc); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) { tmp = BLASnrm2_(&n,Y+ld,&inc); norm = SlepcAbsEigenvalue(norm,tmp); } #endif tmp = 1.0 / norm; PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc)); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc)); #endif } /* set output arguments */ if (iscomplex) (*k)++; if (rnorm) { if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]); else *rnorm = PetscAbsScalar(Y[n-1]); } PetscFunctionReturn(0); #endif }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }